In [6]:
# Load in all required libraries
import pandas as pd 
import boto3
import json
import configparser

In [16]:
# Open and read the contents of the config file
ioc_config = configparser.ConfigParser()
ioc_config.read_file(open('./dwh-ioc.cfg'))

In [17]:
# Load all the keys needed to create AWS services
KEY                    = ioc_config.get('AWS','KEY')
SECRET                 = ioc_config.get('AWS','SECRET')

DWH_REGION             = ioc_config.get("DWH","DWH_REGION")
DWH_CLUSTER_TYPE       = ioc_config.get("DWH","DWH_CLUSTER_TYPE")
DWH_NUM_NODES          = ioc_config.get("DWH","DWH_NUM_NODES")
DWH_NODE_TYPE          = ioc_config.get("DWH","DWH_NODE_TYPE")

DWH_CLUSTER_IDENTIFIER = ioc_config.get("DWH","DWH_CLUSTER_IDENTIFIER")
DWH_DB                 = ioc_config.get("DWH","DWH_DB")
DWH_DB_USER            = ioc_config.get("DWH","DWH_DB_USER")
DWH_DB_PASSWORD        = ioc_config.get("DWH","DWH_DB_PASSWORD")
DWH_PORT               = ioc_config.get("DWH","DWH_PORT")

DWH_IAM_ROLE_NAME      = ioc_config.get("DWH", "DWH_IAM_ROLE_NAME")

In [18]:
# Display the parameters for creating the DWH cluster
df = pd.DataFrame({
        "Param":["DWH_REGION", "DWH_CLUSTER_TYPE", "DWH_NUM_NODES", "DWH_NODE_TYPE", "DWH_CLUSTER_IDENTIFIER", "DWH_DB", "DWH_DB_USER", "DWH_DB_PASSWORD", "DWH_PORT", "DWH_IAM_ROLE_NAME"],
        "Value":[DWH_REGION, DWH_CLUSTER_TYPE, DWH_NUM_NODES, DWH_NODE_TYPE, DWH_CLUSTER_IDENTIFIER, DWH_DB, DWH_DB_USER, DWH_DB_PASSWORD, DWH_PORT, DWH_IAM_ROLE_NAME]
    })

print(df)

                    Param         Value
0              DWH_REGION  eu-central-1
1        DWH_CLUSTER_TYPE    multi-node
2           DWH_NUM_NODES             4
3           DWH_NODE_TYPE     dc2.large
4  DWH_CLUSTER_IDENTIFIER    dwhCluster
5                  DWH_DB           dwh
6             DWH_DB_USER       awsuser
7         DWH_DB_PASSWORD           XXX
8                DWH_PORT          5439
9       DWH_IAM_ROLE_NAME       dwhRole


In [22]:
# Creating resources/clients for all needed infrastructure: EC2, S3, IAM, Redshift
def create_client(name, func):
    print("Creating client for", name)
    return func(name,
                region_name=DWH_REGION,
                aws_access_key_id=KEY,
                aws_secret_access_key=SECRET)


ec2 = create_client('ec2', boto3.resource)
s3 = create_client('s3', boto3.resource)
iam = create_client('iam', boto3.client)
redshift = create_client('redshift', boto3.client)

In [24]:
# Creating IAM role for Redshift, allowing it to use AWS services
print("Creating a new IAM Role") 
iam.create_role(Path='/',
                RoleName=DWH_IAM_ROLE_NAME,
                Description = "Allows Redshift clusters to call AWS services on your behalf.",
                AssumeRolePolicyDocument=json.dumps({'Statement': [{'Action': 'sts:AssumeRole',
                                                                    'Effect': 'Allow',
                                                                    'Principal': {'Service': 'redshift.amazonaws.com'}}],
                                                     'Version': '2012-10-17'}
                                                   )
               )

Creating a new IAM Role


ClientError: An error occurred (InvalidClientTokenId) when calling the CreateRole operation: The security token included in the request is invalid.

In [None]:
# Attaching policy to role, and return the ARN role 
print("Attaching policy to IAM role")
iam.attach_role_policy(RoleName=DWH_IAM_ROLE_NAME,
                       PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess")['ResponseMetadata']['HTTPStatusCode']
roleArn = iam.get_role(RoleName=DWH_IAM_ROLE_NAME)['Role']['Arn']
print("ARN role:", roleArn)

In [None]:
# Creates Redshift cluster (Warning, this costs money - make sure to use it or delete it again!)
redshift.create_cluster(
    #Hardware provisioned
    ClusterType=DWH_CLUSTER_TYPE,
    NodeType=DWH_NODE_TYPE,
    NumberOfNodes=int(DWH_NUM_NODES),

    #Identifiers & Credentials
    DBName=DWH_DB,
    ClusterIdentifier=DWH_CLUSTER_IDENTIFIER,
    MasterUsername=DWH_DB_USER,
    MasterUserPassword=DWH_DB_PASSWORD,
            
    #Roles (for s3 access)
    IamRoles=[roleArn]  
)

In [None]:
# Get endpoint and ARN role for cluster
cluster_properties = redshift.describe_clusters(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0]

DWH_ENDPOINT = myClusterProps['Endpoint']['Address']
DWH_ROLE_ARN = myClusterProps['IamRoles'][0]['IamRoleArn']
print("DWH_ENDPOINT:", DWH_ENDPOINT)
print("DWH_ROLE_ARN:", DWH_ROLE_ARN)

In [None]:
# Update cluster security group to allow access through redshift port
vpc = ec2.Vpc(id=myClusterProps['VpcId'])

# The first Security group should be the default one
defaultSg = list(vpc.security_groups.all())[0]
print("Default Security group:", defaultSg)

# Authorize access
defaultSg.authorize_ingress(GroupName=defaultSg.group_name,
                            CidrIp='0.0.0.0/0',
                            IpProtocol='TCP',
                            FromPort=int(DWH_PORT),
                            ToPort=int(DWH_PORT)
                           )

In [None]:
# Test connection
dwh_config = configparser.ConfigParser()
dwh_config.read_file(open('./dwh.cfg'))

conn = psycopg2.connect("host={} dbname={} user={} password={} port={}".format(*config['CLUSTER'].values()))

cur = conn.cursor()
print('Connected to AWS Redshift cluster')
conn.close()