In [None]:
!pip install pymysql
import pymysql
import boto3
import json
import pandas as pd
from IPython.display import display

def get_secret_name_from_ssm(ssm_param_name: str):
    """retrive secret name from ssm parameter"""
    # create ssm client
    client = boto3.client('ssm')

    # get db parameter
    secret_name = client.get_parameter(Name=ssm_param_name)['Parameter']['Value']

    return secret_name

def get_secret(secret_name, region_name):
    """Retrieve RDS database credentials from AWS Secrets Manager."""
    # Create a Secrets Manager client
    client = boto3.client('secretsmanager', region_name=region_name)

    try:
        # Get the secret value from Secrets Manager
        response = client.get_secret_value(SecretId=secret_name)

        # Secrets Manager returns the secret as a JSON string, parse it
        secret = json.loads(response['SecretString'])
        return secret

    except Exception as e:
        print(f"Error retrieving secret: {e}")
        raise

In [None]:
def get_rds_endpoint(rds_instance_identifier):
    """Retrieve RDS instance endpoint from AWS RDS."""
    rds_client = boto3.client('rds')
    response = rds_client.describe_db_clusters(
        DBClusterIdentifier=rds_instance_identifier
    )
    endpoint = response['DBClusters'][0]['Endpoint']
    return endpoint

In [None]:
# AWS RDS connection details
rds_instance_identifier = 'survaasdefault-db'
rds_endpoint = get_rds_endpoint(rds_instance_identifier)
database_name = 'SurvaasDefaultDb'
ssm_param_name = 'SurvaasDefaultDbSecretArn'
region = boto3.session.Session().region_name
username = get_secret(get_secret_name_from_ssm(ssm_param_name), region)["username"]
password = get_secret(get_secret_name_from_ssm(ssm_param_name), region)["password"]
table_name = 'customer_sample_data'

In [None]:
# Load the dataset
file_path = "extended_survey_dataset.csv"
data = pd.read_csv(file_path)

In [None]:
# Connect to the RDS MySQL database
connection = pymysql.connect(
    host=rds_endpoint,
    user=username,
    password=password,
    database=database_name,
    port=3306,
    charset="utf8mb4",
    cursorclass=pymysql.cursors.DictCursor
)

In [None]:
# Create a table (if it doesn't exist)
create_table_query = f"""
CREATE TABLE IF NOT EXISTS {table_name} (
    RespondentID INT PRIMARY KEY,
    Age INT,
    Gender TINYINT,
    LikesProductA INT,
    LikesProductB INT,
    PrefersOnlineShopping INT,
    ValuesDiscounts INT,
    BrandLoyalty INT,
    WillRecommend INT,
    OverallSatisfaction INT,
    TechSavviness INT,
    EnvironmentalConcern INT
);
"""

In [None]:
# Insert data into the table
try:
    with connection.cursor() as cursor:
        # Create table
        cursor.execute(create_table_query)
        connection.commit()

        # Insert data row by row
        insert_query = f"""
        INSERT INTO {table_name} (
            RespondentID, Age, Gender, LikesProductA, LikesProductB,
            PrefersOnlineShopping, ValuesDiscounts, BrandLoyalty,
            WillRecommend, OverallSatisfaction, TechSavviness, EnvironmentalConcern
        ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
        """
        for _, row in data.iterrows():
            cursor.execute(insert_query, tuple(row))
        connection.commit()
        print("Data successfully inserted into the RDS database.")
finally:
    connection.close()