#### Create Aws Redshift cluster (IaC)

In [1]:
import boto3 
import pandas as pd
import psycopg2
import json 
import configparser

In [2]:
config = configparser.ConfigParser()
config.read_file(open("cluster.ini"))

In [3]:
KEY = config.get("AWS", "KEY")
SECRET = config.get("AWS", "SECRET")
DWH_CLUSTE_TYPE = config.get("DWH", "DWH_CLUSTE_TYPE")
DWH_NUM_NODES = config.get("DWH", "DWH_NUM_NODES")
DWH_NODE_TYPE = config.get("DWH", "DWH_NODE_TYPE")
DWH_CLUSTER_IDENIFIER = config.get("DWH", "DWH_CLUSTER_IDENIFIER")
DWH_DB = config.get("DWH", "DWH_DB")
DWH_DB_USER = config.get("DWH", "DWH_DB_USER")
DWH_DB_PASSWORD = config.get("DWH", "DWH_DB_PASSWORD")
DWH_PORT = config.get("DWH", "DWH_PORT")
DWH_IAM_ROLE_NAME = config.get("DWH", "DWH_IAM_ROLE_NAME")

In [4]:
# Interact with different service in aws 
ec2 = boto3.resource(
    'ec2',
    region_name="ap-east-1",
    aws_access_key_id=KEY,
    aws_secret_access_key=SECRET
)

s3 = boto3.resource(
    's3',
    region_name="ap-east-1",
    aws_access_key_id=KEY,
    aws_secret_access_key=SECRET
)

iam = boto3.client(
    'iam',
    region_name="ap-east-1",
    aws_access_key_id=KEY,
    aws_secret_access_key=SECRET
)

redshift = boto3.client(
    'redshift',
    region_name="ap-east-1",
    aws_access_key_id=KEY,
    aws_secret_access_key=SECRET
)

In [11]:
# Log the practice data from tickitdb folder in the bucket
bucket=s3.Bucket("redshift-practice-thlawab")
log_data_files=[filename.key for filename in bucket.objects.filter(Prefix='tickitdb/')]
log_data_files

['tickitdb/allevents_pipe.txt',
 'tickitdb/allusers_pipe.txt',
 'tickitdb/category_pipe.txt',
 'tickitdb/date2008_pipe.txt',
 'tickitdb/listings_pipe.txt',
 'tickitdb/sales_tab.txt',
 'tickitdb/venue_pipe.txt']

In [12]:
# Identifier for iam roles 
roleArn = iam.get_role(RoleName=DWH_IAM_ROLE_NAME)["Role"]["Arn"]

In [15]:
# Create redshift cluster with code  
try:
    response = redshift.create_cluster(
        ClusterType=DWH_CLUSTE_TYPE,
        NodeType=DWH_NODE_TYPE,
        # Credentials & Identifiers
        DBName=DWH_DB,
        ClusterIdentifier=DWH_CLUSTER_IDENIFIER,
        MasterUsername=DWH_DB_USER,
        MasterUserPassword=DWH_DB_PASSWORD,
        # Role for s3 access 
        IamRoles=[roleArn]
    )
except Exception as e:
    print(e)


In [5]:
# Loading the redshift cluster info
myClusterProps = redshift.describe_clusters(ClusterIdentifier=DWH_CLUSTER_IDENIFIER)["Clusters"][0]

In [6]:
DWH_ENDPOINT = myClusterProps['Endpoint']['Address']
DWH_ROLE_ARN = myClusterProps['IamRoles'][0]['IamRoleArn']
DWH_VPC = myClusterProps['VpcId']
DB_NAME = myClusterProps['DBName']
DB_USER = myClusterProps['MasterUsername']

In [25]:
# Configure security group (inbound role) for redshift cluster 
try:
    vpc = ec2.Vpc(id=DWH_VPC)
    defaultSg = list(vpc.security_groups.all())[0]
    print(defaultSg)
    defaultSg.authorize_ingress(
        GroupName=defaultSg.group_name,
        CidrIp='0.0.0.0/0',
        IpProtocol='TCP',
        FromPort=int(DWH_PORT),
        ToPort=int(DWH_PORT)
    )
except Exception as e:
    print(e)

ec2.SecurityGroup(id='sg-0112e8b0cf043dd33')


#### Connect Aws Redshift Cluster

In [28]:
try:
    conn = psycopg2.connect(
        host=DWH_ENDPOINT,
        dbname=DB_NAME,
        user=DB_USER,
        password=DWH_DB_PASSWORD,
        port=DWH_PORT
    )
except psycopg2.Error as e:
    print("Error: Could not make connection to the aws redshift")
    print(e)

conn.set_session(autocommit=True)

In [29]:
try:
    cur = conn.cursor()
except psycopg2.Error as e:
    print("Error: Could not get cursor to the Database")
    print(e)

In [44]:
# Create Table for the data model
try:
    cur.execute(
    """
    create table IF NOT EXISTS users(
        userid integer not null distkey sortkey,
        username char(8),
        firstname char(30),
        lastname char(30),
        city varchar(30),
        state char(2),
        email varchar(100),
        phone char(14),
        likesports boolean,
        liketheatre boolean,
        likeconcerts boolean,
        likejazz boolean,
        likeclassical boolean,
        likeopera boolean,
        likerock boolean,
        likevegas boolean,
        likebroadway boolean,
        likemusicals boolean
    );

    create table IF NOT EXISTS venue(
        venueid smallint not null distkey sortkey,
        venuename varchar(100),
        venuecity varchar(30),
        venuestate char(2),
        venueseats integer
    );

    create table IF NOT EXISTS category(
        catid smallint not null distkey sortkey,
        catgroup varchar(10),
        catname varchar(10),
        catdesc varchar(50)
    );

    create table IF NOT EXISTS date(
        dateid smallint not null distkey sortkey,
        caldate date not null,
        day char(3) not null,
        week smallint not null,
        month char(5) not null,
        qtr char(5) not null,
        year smallint not null,
        hoilday boolean default('N')
    );

    create table IF NOT EXISTS event(
        eventid integer not null distkey,
        venueid smallint not null,
        catid smallint not null,
        dateid smallint not null sortkey,
        eventname varchar(200),
        starttime timestamp
    );

    create table IF NOT EXISTS listing(
        listid integer not null distkey,
        sellerid integer not null,
        eventid integer not null,
        dateid smallint not null sortkey,
        numtickets smallint not null,
        priceperticket decimal(8, 2),
        totalprice decimal(8, 2),
        listtime timestamp
    );

    """
    )
except psycopg2.Error as e:
    # Roll back the transaction
    conn.rollback()
    print("Error: Issue creating table")
    print(e)



In [45]:
# Copy data from s3 bucket 
try:
    cur.execute(f"""
    copy users from 's3://redshift-practice-thlawab/tickitdb/allusers_pipe.txt'
    credentials 'aws_iam_role={DWH_ROLE_ARN}'
    delimiter '|'
    region 'ap-east-1';

    copy venue from 's3://redshift-practice-thlawab/tickitdb/venue_pipe.txt'
    credentials 'aws_iam_role={DWH_ROLE_ARN}'
    delimiter '|'
    region 'ap-east-1';

    copy category from 's3://redshift-practice-thlawab/tickitdb/category_pipe.txt'
    credentials 'aws_iam_role={DWH_ROLE_ARN}'
    delimiter '|'
    region 'ap-east-1';

    copy date from 's3://redshift-practice-thlawab/tickitdb/date2008_pipe.txt'
    credentials 'aws_iam_role={DWH_ROLE_ARN}'
    delimiter '|'
    region 'ap-east-1';

    copy event from 's3://redshift-practice-thlawab/tickitdb/allevents_pipe.txt'
    credentials 'aws_iam_role={DWH_ROLE_ARN}'
    delimiter '|'
    region 'ap-east-1';

    copy listing from 's3://redshift-practice-thlawab/tickitdb/listings_pipe.txt'
    credentials 'aws_iam_role={DWH_ROLE_ARN}'
    delimiter '|'
    region 'ap-east-1';
    """)
except psycopg2.Error as e:
    # Roll back the transaction
    conn.rollback()
    print("Error: Issue copying data to table")
    print(e)

In [50]:
try:
    cur.execute("""
    SELECT * 
    FROM users 
    LIMIT 5
    ;
    """)
except psycopg2.Error as e:
    # Roll back the transaction
    conn.rollback()
    print("Error: Issue reading table")
    print(e)

In [51]:
results = cur.fetchall()
for row in results:
    print(row)

(1, 'JSG99FHE', 'Rafael                        ', 'Taylor                        ', 'Kent', 'WA', 'Etiam.laoreet.libero@sodalesMaurisblandit.edu', '(664) 602-4412', True, True, None, False, True, None, None, True, False, True)
(3, 'IFT66TXU', 'Lars                          ', 'Ratliff                       ', 'High Point', 'ME', 'amet.faucibus.ut@condimentumegetvolutpat.ca', '(624) 767-2465', True, False, None, False, None, False, True, None, None, True)
(6, 'NDQ15VBM', 'Victor                        ', 'Hernandez                     ', 'Naperville', 'GA', 'turpis@accumsanlaoreet.org', '(818) 765-4255', False, None, None, True, None, True, True, True, True, True)
(8, 'AZG78YIP', 'Colton                        ', 'Roy                           ', 'Guayama', 'AK', 'ullamcorper.nisl@Cras.edu', '(998) 934-9210', None, None, True, True, None, True, False, None, False, False)
(11, 'MFN29TYU', 'Anika                         ', 'Huff                          ', 'Rawlins', 'MT', 'arcu.Curabitur

In [52]:
try:
    conn.close()
except psycopg2.Error as e:
    print(e)

In [54]:
redshift.delete_cluster(
    ClusterIdentifier=DWH_CLUSTER_IDENIFIER,
    SkipFinalClusterSnapshot=True
)

ClusterNotFoundFault: An error occurred (ClusterNotFound) when calling the DeleteCluster operation: Cluster redshift-practice not found.