In [1]:
import boto3
import json
import pandas as pd
import configparser

In [2]:
config = configparser.ConfigParser()
config.read_file(open('dwh.cfg'))

In [3]:
def create_aws_clients():
    iam_client = boto3.client('iam',
                       region_name=config.get('AWS','REGION'),
                       aws_access_key_id=config.get('AWS','KEY'),
                       aws_secret_access_key=config.get('AWS','SECRET')
                      )

    redshift_client = boto3.client('redshift',
                            region_name=config.get('AWS','REGION'),
                            aws_access_key_id=config.get('AWS','KEY'),
                            aws_secret_access_key=config.get('AWS','SECRET')
                           )
    
    ec2_client = boto3.resource('ec2',
                       region_name=config.get('AWS','REGION'),
                       aws_access_key_id=config.get('AWS','KEY'),
                       aws_secret_access_key=config.get('AWS','SECRET')
                      )
    return iam_client, redshift_client, ec2_client

In [4]:
def create_iam_role(iam_client):
    try:
        print('===== Creating a new IAM Role =====')
        iam_client.create_role(
            Path='/',
            RoleName=config.get('IAM_ROLE','IAM_ROLE_NAME'),
            Description = "Allows Redshift clusters to call AWS services on your behalf.",
            AssumeRolePolicyDocument=json.dumps(
                {
                    'Statement': [{
                        'Action': 'sts:AssumeRole',
                        'Effect': 'Allow',
                        'Principal': {
                            'Service': 'redshift.amazonaws.com'
                        }
                    }],
                    'Version': '2012-10-17'
                }
            )
        )
        
        print('===== Attaching AmazonS3ReadOnlyAccess policy with the IAM Role =====')
        iam_client.attach_role_policy(
            RoleName=config.get('IAM_ROLE','IAM_ROLE_NAME'),
            PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
        )

    except Exception as e:
        print(e)

In [5]:
def create_redshift_cluster(iam_client, redshift_client):
    roleArn = iam_client.get_role(RoleName=config.get('IAM_ROLE','IAM_ROLE_NAME'))['Role']['Arn']
    if not roleArn:
        return
    
    try:
        response = redshift_client.create_cluster(        
            #HW
            ClusterType=config.get('DWH','CLUSTER_TYPE'),
            NodeType=config.get('DWH','NODE_TYPE'),
            NumberOfNodes=int(config.get('DWH','NUM_NODES')),

            #Identifiers & Credentials
            DBName=config.get('CLUSTER','DB_NAME'),
            ClusterIdentifier=config.get('CLUSTER','CLUSTER_IDENTIFIER'),
            MasterUsername=config.get('CLUSTER','DB_USER'),
            MasterUserPassword=config.get('CLUSTER','DB_PASSWORD'),

            #Roles (for s3 access)
            IamRoles=[roleArn]  
        )
        print("==== Creating Redshift Cluster =====")
    except Exception as e:
        print(e)

In [6]:
def pretty_redshift_props(props):
    pd.set_option('display.max_colwidth', -1)
    keys_to_show = ["ClusterIdentifier", "NodeType", "ClusterStatus", "MasterUsername", "DBName", "Endpoint", "NumberOfNodes", 'VpcId']
    x = [(k, v) for k,v in props.items() if k in keys_to_show]
    return pd.DataFrame(data=x, columns=["Key", "Value"])

In [7]:
def redshift_cluster_status(redshift_client, verbose=False):
    if not redshift_client:
        return
    
    cluster_props = redshift_client.describe_clusters(ClusterIdentifier=config.get('CLUSTER','CLUSTER_IDENTIFIER'))['Clusters'][0]
    if verbose:
        print(pretty_redshift_props(cluster_props))
    return cluster_props

In [8]:
def create_aws_resources():
    iam_client = None
    redshift_client = None
    ec2_client = None
    if not iam_client and not redshift_client:
        iam_client, redshift_client, ec2_client = create_aws_clients()

    iam_role = create_iam_role(iam_client)

    create_redshift_cluster(iam_client, redshift_client)
    return iam_client, redshift_client, ec2_client

In [9]:
def open_access_to_cluster(ec2_client, redshift_client):
    cluster_props = redshift_cluster_status(redshift_client)
    if not cluster_props:
        return
    
    try:
        vpc = ec2_client.Vpc(id=cluster_props['VpcId'])
        defaultSg = list(vpc.security_groups.all())[0]
        print(defaultSg)
        port = int(config.get('CLUSTER','DB_PORT'))
        defaultSg.authorize_ingress(
            GroupName=defaultSg.group_name,
            CidrIp='0.0.0.0/0',
            IpProtocol='TCP',
            FromPort=port,
            ToPort=port
        )
    except Exception as e:
        print(e)

In [10]:
def delete_aws_resources(iam_client, redshift_client):
    if not redshift_client and not iam_client:
        return
    
    redshift_client.delete_cluster(ClusterIdentifier=config.get('CLUSTER','CLUSTER_IDENTIFIER'), SkipFinalClusterSnapshot=True)
    iam_client.detach_role_policy(RoleName=config.get('IAM_ROLE','IAM_ROLE_NAME'), PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess")
    iam_client.delete_role(RoleName=config.get('IAM_ROLE','IAM_ROLE_NAME'))

In [11]:
iam_client, redshift_client, ec2_client = create_aws_resources()

===== Creating a new IAM Role =====
===== Attaching AmazonS3ReadOnlyAccess policy with the IAM Role =====
==== Creating Redshift Cluster =====


In [14]:
open_access_to_cluster(ec2_client, redshift_client)

ec2.SecurityGroup(id='sg-0fe4689831c2becd0')
An error occurred (InvalidPermission.Duplicate) when calling the AuthorizeSecurityGroupIngress operation: the specified rule "peer: 0.0.0.0/0, TCP, from port: 5439, to port: 5439, ALLOW" already exists


In [21]:
delete_aws_resources(iam_client, redshift_client)

In [15]:
redshift_cluster_status(redshift_client, verbose=True)

                 Key  \
0  ClusterIdentifier   
1  NodeType            
2  ClusterStatus       
3  MasterUsername      
4  DBName              
5  Endpoint            
6  VpcId               
7  NumberOfNodes       

                                                                                   Value  
0  dwhcluster                                                                             
1  dc2.large                                                                              
2  available                                                                              
3  dwhuser                                                                                
4  dwh                                                                                    
5  {'Address': 'dwhcluster.ciwxovjrk2is.us-west-2.redshift.amazonaws.com', 'Port': 5439}  
6  vpc-0566300b60bf5d64f                                                                  
7  2                                                   

{'ClusterIdentifier': 'dwhcluster',
 'NodeType': 'dc2.large',
 'ClusterStatus': 'available',
 'MasterUsername': 'dwhuser',
 'DBName': 'dwh',
 'Endpoint': {'Address': 'dwhcluster.ciwxovjrk2is.us-west-2.redshift.amazonaws.com',
  'Port': 5439},
 'ClusterCreateTime': datetime.datetime(2022, 3, 22, 9, 34, 31, 122000, tzinfo=tzlocal()),
 'AutomatedSnapshotRetentionPeriod': 1,
 'ClusterSecurityGroups': [],
 'VpcSecurityGroups': [{'VpcSecurityGroupId': 'sg-0fe4689831c2becd0',
   'Status': 'active'}],
 'ClusterParameterGroups': [{'ParameterGroupName': 'default.redshift-1.0',
   'ParameterApplyStatus': 'in-sync'}],
 'ClusterSubnetGroupName': 'default',
 'VpcId': 'vpc-0566300b60bf5d64f',
 'AvailabilityZone': 'us-west-2c',
 'PreferredMaintenanceWindow': 'mon:09:00-mon:09:30',
 'PendingModifiedValues': {},
 'ClusterVersion': '1.0',
 'AllowVersionUpgrade': True,
 'NumberOfNodes': 2,
 'PubliclyAccessible': True,
 'Encrypted': False,
 'ClusterPublicKey': 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC8GDly1