In [None]:
from pprint import pprint
import json

import pandas as pd

from aws_client.aws_client import AWS
from aws_client import utils

In [None]:
%load_ext sql

In [None]:
configs = utils.parse_configs('../config/dwh.cfg')
secrets = utils.get_secrets()

In [None]:
aws = AWS(aws_access_key_id=secrets.get('KEY'),
          aws_secret_access_key=secrets.get('SECRET'),
          region=configs.get('REGION'),
          config_params=configs)

In [None]:
bucket = aws.s3.Bucket('udacity-dend')

In [None]:
role_arn = aws.iam.get_role(
    RoleName=configs.get('DWH_IAM_ROLE_NAME')
)['Role']['Arn']

In [None]:
aws.create_redshift_cluster(role_arn)

In [None]:
def get_redshift_cluster_props():
    redshift_cluster_props = aws.redshift.describe_clusters(
        ClusterIdentifier=configs.get('DWH_CLUSTER_IDENTIFIER')
    )['Clusters']
    return redshift_cluster_props

def print_dwh_params(redshift_cluster_props):
    DWH_ENDPOINT = redshift_cluster_props['Endpoint']['Address']
    DWH_ROLE_ARN = redshift_cluster_props['IamRoles'][0]['IamRoleArn']
    print(DWH_ENDPOINT)
    print(DWH_ROLE_ARN)


def check_redshift_status(aws: AWS, redshift_cluster_props):
    df = aws.get_redshift_props_as_pd_df(redshift_cluster_props)
    print(df)

redshift_cluster_props = get_redshift_cluster_props()[0]
check_redshift_status(aws, redshift_cluster_props)
print_dwh_params(redshift_cluster_props)

In [None]:
# Once Cluster is ready, open the tcp port
aws.open_tcp_port(redshift_cluster_props)

In [None]:
conn_string = "postgresql://{}:{}@{}:{}/{}".format(configs["DWH_DB_USER"],
                                                   configs["DWH_DB_PASSWORD"],
                                                   'dwhcluster.c6jsnvqemczs.us-west-2.redshift.amazonaws.com', 
                                                   configs["DWH_PORT"],
                                                   configs["DWH_DB"])
print(conn_string)
%sql $conn_string

In [None]:
%sql DROP TABLE IF EXISTS log_data;

In [None]:
%%sql
CREATE TABLE IF NOT EXISTS "log_data" (
    "artist" TEXT,
    "auth" VARCHAR(12) NOT NULL,
    "firstName" TEXT NOT NULL,
    "gender" TEXT NOT NULL,
    "itemInSession" INTEGER NOT NULL,
    "length" DECIMAL,
    "level" VARCHAR(12) NOT NULL,
    "location" TEXT NOT NULL,
    "method" VARCHAR(7) NOT NULL,
    "page" TEXT,
    "registration" VARCHAR(12),
    "sessionId" INTEGER NOT NULL,
    "song" TEXT,
    "status" SMALLINT NOT NULL,
    "timestamp" BIGINT NOT NULL,
    "userAgent" TEXT NOT NULL,
    "userId" INTEGER NOT NULL
);

In [None]:
read_s3_role_arn = ''

query = f"""
COPY log_data
FROM 's3://udacity-dend/log_data/'
CREDENTIALS 'aws_iam_role={read_s3_role_arn}'
gzip
DELIMITER ';'
REGION 'us-west-2';
"""

%sql query

In [None]:
aws.redshift.delete_cluster(
    ClusterIdentifier=aws.configs['DWH_CLUSTER_IDENTIFIER'],
    SkipFinalClusterSnapshot=True)

In [None]:
aws.iam.detach_role_policy(
    RoleName=aws.configs['DWH_IAM_ROLE_NAME'],
    PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess")

In [None]:
aws.iam.delete_role(RoleName=aws.configs['DWH_IAM_ROLE_NAME'])