In [1]:
import boto3
import pandas as pd
import psycopg2
import json

In [2]:
import configparser
config = configparser.ConfigParser()
config.read_file(open('cluster.config'))

In [1]:
config.get('DWH','DWH_DB')

In [3]:

KEY = config.get('AWS','KEY')
SECRET = config.get('AWS','SECRET')


DWH_CLUSTER_TYPE = config.get('DWH','DWH_CLUSTER_TYPE')
DWH_NUM_NODES = config.get('DWH','DWH_NUM_NODES')
DWH_NODE_TYPE = config.get('DWH','DWH_NODE_TYPE')
DWH_CLUSTER_IDENTIFIER = config.get('DWH','DWH_CLUSTER_IDENTIFIER')
DWH_DB = config.get('DWH','DWH_DB')
DWH_DB_USER = config.get('DWH','DWH_DB_USER')
DWH_DB_PASSWORD = config.get('DWH','DWH_DB_PASSWORD')
DWH_PORT = config.get('DWH','DWH_PORT')
DWH_IAM_ROLE_NAME = config.get('DWH','DWH_IAM_ROLE_NAME')

(DWH_CLUSTER_TYPE,DWH_NODE_TYPE,DWH_DB_USER)

In [4]:
data = {
    "Param": ['KEY', 'SECRET', 'DWH_CLUSTER_TYPE', 'DWH_NUM_NODES', 'DWH_NODE_TYPE',
              'DWH_CLUSTER_IDENTIFIER', 'DWH_DB', 'DWH_DB_USER', 'DWH_DB_PASSWORD',
              'DWH_PORT', 'DWH_IAM_ROLE_NAME'],
    "Value": [KEY, SECRET, DWH_CLUSTER_TYPE, DWH_NUM_NODES, DWH_NODE_TYPE,
              DWH_CLUSTER_IDENTIFIER, DWH_DB, DWH_DB_USER, DWH_DB_PASSWORD,
              DWH_PORT, DWH_IAM_ROLE_NAME]
}

df = pd.DataFrame(data)
df

In [5]:
ec2 = boto3.resource('ec2', region_name = 'us-east-1',
                    aws_access_key_id = KEY,
                    aws_secret_access_key = SECRET)
s3 = boto3.resource('s3', region_name = 'us-east-1',
                 aws_access_key_id =KEY,
                 aws_secret_access_key = SECRET)
iam = boto3.client('iam', region_name = 'us-east-1',
                  aws_access_key_id =KEY,
                  aws_secret_access_key = SECRET)
redshift = boto3.client('redshift' , region_name = 'us-east-1',
                       aws_access_key_id = KEY,
                       aws_secret_access_key = SECRET)


In [5]:
bucket_name = 'covid-19-storage'
try:
    
    response = s3.create_bucket(Bucket = bucket_name,
                                ACL = 'private'
                            )
    print("Bucket created")
except Exception as e:
    print(e)


In [6]:
# Output Bucket##
bucket_name = 'covid-19-output-storage'
try:
    
    response = s3.create_bucket(Bucket = bucket_name,
                                ACL = 'private'
                            )
    print("Bucket created")
except Exception as e:
    print(e)

In [29]:
# Files to be uploaded

s3 = boto3.client('s3',region_name = 'us-east-1',
                 aws_access_key_id =KEY,
                 aws_secret_access_key = SECRET)
bucket_name = 'datatobestored'
files_to_uploaded = ['allusers_pipe.txt','date2008_pipe.txt', 'category_pipe.txt']

for file_path in files_to_uploaded:
    try:
        key = file_path.split('/')[-1]
        response = s3.upload_file(file_path,bucket_name, key)
        print("Files uploaded successfully")                         
        
    except Exception as e:
        print(e)
    

Files uploaded successfully
Files uploaded successfully
Files uploaded successfully


In [7]:

# Specify the bucket name
bucket_name = 'datatobestored'

# Retrieve the bucket object
bucket = s3.Bucket(bucket_name)

# Iterate over the objects in the bucket
try:
    for my_bucket_object in bucket.objects.all():
        print(my_bucket_object.key)
except Exception as e:
    print("Error iterating over objects in the bucket:", e)


In [44]:
roleARN = iam.get_role(RoleName = DWH_IAM_ROLE_NAME)['Role']['Arn']

In [8]:
# [roleARN]

In [9]:
try:
    response = redshift.create_cluster(
        DBName=DWH_DB,
        ClusterIdentifier=DWH_CLUSTER_IDENTIFIER,
        ClusterType=DWH_CLUSTER_TYPE,
        NodeType=DWH_NODE_TYPE,
        MasterUsername=DWH_DB_USER,
        MasterUserPassword=DWH_DB_PASSWORD,
        IamRoles=[roleARN]
    )
    print("Cluster creation request successful.")
except Exception as e:
    print("Cluster creation failed:", e)

In [10]:
response = redshift.describe_clusters(
    ClusterIdentifier = DWH_CLUSTER_IDENTIFIER)
response

In [11]:
def poppingredshift(props):
    key_values = ['VpcId', 'NodeType', 'ClusterNodes' ,'MasterUsername', 'DBName' ,'Endpoint']
    x = [(k,v) for k,v in props.items() if k in key_values]
    return pd.DataFrame(data = x , columns = ["Key", "Value"])
response = redshift.describe_clusters(
    ClusterIdentifier = DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
poppingredshift(response)

In [66]:
df = response
DWH_ENDPOINT = df['Endpoint']['Address']
DB_NAME = df['DBName']
DB_USER = df['MasterUsername']

In [51]:
# response = redshift.delete_cluster(
#     ClusterIdentifier=DWH_CLUSTER_IDENTIFIER,
#     SkipFinalClusterSnapshot=True,
#     FinalClusterSnapshotIdentifier='',
#     FinalClusterSnapshotRetentionPeriod=-1
# )

In [69]:
## Connect to the redshift cluster
try:
    
    conn = psycopg2.connect(host = DWH_ENDPOINT, 
                           dbname = DB_NAME, user = DB_USER , password = 'Password123', 
                           port = 5439)
except Exception as e:
    print(e)
conn.set_session(autocommit = True)

In [71]:
try:
    cur = conn.cursor()
except Exception as e:
    print(e)
conn.close()

In [73]:
# redshift.delete_cluster(ClusterIdentifier = DWH_CLUSTER_IDENTIFIER, SkipFinalClusterSnapshot = True)

In [67]:
bucket_name = 'datatobestored'

# Use the bucket object to delete the object
try:
    response = s3.Object(bucket_name, KEY).delete()
    print("Object deleted successfully.")
except Exception as e:
    print("Error deleting object:", e)

Object deleted successfully.
