In [1]:
## set up the log function
## import packages
## load the dwh parameters
import pandas as pd
import boto3
import json
import configparser
import psycopg2
import datetime
import time
from botocore.exceptions import ClientError
now = datetime.datetime.now()

## define logstart and logend
def logstart(eventname):
    print(eventname + " started " + str(datetime.datetime.now()))
def logend(eventname):
    print(eventname + " ended " + str(datetime.datetime.now()))
    
config = configparser.ConfigParser()
config.read_file(open('dwh.cfg'))

KEY                    = config.get('AWS','KEY')
SECRET                 = config.get('AWS','SECRET')

DWH_CLUSTER_TYPE       = config.get("DWH","DWH_CLUSTER_TYPE")
DWH_NUM_NODES          = config.get("DWH","DWH_NUM_NODES")
DWH_NODE_TYPE          = config.get("DWH","DWH_NODE_TYPE")

DWH_CLUSTER_IDENTIFIER = config.get("DWH","DWH_CLUSTER_IDENTIFIER")
DWH_DB                 = config.get("DWH","DWH_DB_NAME")
DWH_DB_USER            = config.get("DWH","DWH_DB_USER")
DWH_DB_PASSWORD        = config.get("DWH","DWH_DB_PASSWORD")
DWH_PORT               = config.get("DWH","DWH_DB_PORT")

DWH_IAM_ROLE_NAME      = config.get("DWH", "DWH_IAM_ROLE_NAME")


In [2]:
pd.DataFrame({"Param":
                  ["DWH_CLUSTER_TYPE", "DWH_NUM_NODES", "DWH_NODE_TYPE", "DWH_CLUSTER_IDENTIFIER", "DWH_DB_NAME", "DWH_DB_USER", "DWH_DB_PASSWORD", "DWH_DB_PORT", "DWH_IAM_ROLE_NAME"],
              "Value":
                  [DWH_CLUSTER_TYPE, DWH_NUM_NODES, DWH_NODE_TYPE, DWH_CLUSTER_IDENTIFIER, DWH_DB, DWH_DB_USER, DWH_DB_PASSWORD, DWH_PORT, DWH_IAM_ROLE_NAME]
             })

Unnamed: 0,Param,Value
0,DWH_CLUSTER_TYPE,multi_node
1,DWH_NUM_NODES,1
2,DWH_NODE_TYPE,dc2.large
3,DWH_CLUSTER_IDENTIFIER,dwhCluster
4,DWH_DB_NAME,dwh
5,DWH_DB_USER,dwhuser
6,DWH_DB_PASSWORD,Passw0rd
7,DWH_DB_PORT,5439
8,DWH_IAM_ROLE_NAME,dwhRole


In [3]:
## create the s3, iam and redshift clients
logstart("create clients for s3 iam redshift")
s3 =  boto3.resource('s3',
                      region_name="us-west-2",
                     aws_access_key_id=KEY,
                      aws_secret_access_key=SECRET
                    )

iam = boto3.client('iam',aws_access_key_id=KEY,
                     aws_secret_access_key=SECRET,
                     region_name="us-west-2"
                  )

redshift = boto3.client('redshift',
                         region_name="us-west-2",
                         aws_access_key_id=KEY,
                         aws_secret_access_key=SECRET
                       )

logend("create clients for s3 iam redshift")

create clients for s3 iam redshift started 2023-10-05 03:11:46.505245
create clients for s3 iam redshift ended 2023-10-05 03:11:46.636104


In [None]:
## check the project data sources on s3
logstart("check out sample project data sources on s3")
sampleDbBucketDend= s3.Bucket("udacity-dend")
for obj in sampleDbBucketDend.objects.filter(Prefix="song-data/A/A/A"):
    print(obj)
for obj in sampleDbBucketDend.objects.filter(Prefix="log_data"):
    print(obj)
##for obj in sampleDbBucketDend.objects.all():
##    print(obj)
logend("check out sample project data sources on s3")

In [4]:
print("i am running role")
logstart("create iam role")

# define the IAM role name and trust policy
role_name = 'RedshiftS3ReadOnlyRole'
trust_policy = {
    'Version': '2012-10-17',
    'Statement': [
        {
            'Effect': 'Allow',
            'Principal': {
                'Service': 'redshift.amazonaws.com'
            },
            'Action': 'sts:AssumeRole'
        }
    ]
}
## create the iam role
logstart("create iam role")
try:
    print('1.1 Creating a new IAM Role')
    dwhRole = iam.create_role(
       Path='/',
       RoleName=DWH_IAM_ROLE_NAME,
       Description = "Allows Redshift clusters to call AWS services on your behalf",
       AssumeRolePolicyDocument=json.dumps(trust_policy)
      )
    

except Exception as e:
    print(e)
logend("create iam role")

## attach read only policy to the role
logstart("attach policy")
print('1.2 Attaching Policy')
iam.attach_role_policy(RoleName=DWH_IAM_ROLE_NAME,
                       PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
                      )['ResponseMetadata']['HTTPStatusCode']

logend("attach policy")

## print out the iam role arn
logstart("get and print iam role arn")
print('1.3 Get the IAM role ARN')
roleArn = iam.get_role(RoleName=DWH_IAM_ROLE_NAME)['Role']['Arn']

print(roleArn)
logend("get and print iam role arn")

## store the variables


i am running role
create iam role started 2023-10-05 03:11:57.811180
create iam role started 2023-10-05 03:11:57.811321
1.1 Creating a new IAM Role
An error occurred (EntityAlreadyExists) when calling the CreateRole operation: Role with name dwhRole already exists.
create iam role ended 2023-10-05 03:11:58.017944
attach policy started 2023-10-05 03:11:58.018038
1.2 Attaching Policy
attach policy ended 2023-10-05 03:11:58.084828
get and print iam role arn started 2023-10-05 03:11:58.084953
1.3 Get the IAM role ARN
arn:aws:iam::854186921520:role/dwhRole
get and print iam role arn ended 2023-10-05 03:11:58.147223
