# Create a Redshift Cluster using AWS python SDK

In [1]:
import boto3
import configparser
import os
import json

from dotenv import load_dotenv

## Data Warehouse Parameters

In [2]:
load_dotenv()

KEY = os.getenv('AWS_KEY')
SECRET = os.getenv('AWS_SECRET')

DB_NAME="dwh_sparkify"
DB_USER="sparkify_deng"
DB_PASSWORD="ThePassw0rd"
DB_PORT=5439


DWH_CLUSTER_TYPE="multi-node"
DWH_NUM_NODES=4
DWH_NODE_TYPE="dc2.large"

DWH_CLUSTER_IDENTIFIER = "dwhCluster"
DWH_IAM_ROLE_NAME="dwhRole"

## Create clients

In [3]:
s3 = boto3.resource(
    service_name = 's3',
    region_name = 'us-west-2',
    aws_access_key_id=KEY,
    aws_secret_access_key=SECRET
)

iam = boto3.client(
    service_name = 'iam',
    region_name = 'us-west-2',
    aws_access_key_id=KEY,
    aws_secret_access_key=SECRET
)

redshift = boto3.client(
    service_name = 'redshift',
    region_name = 'us-west-2',
    aws_access_key_id=KEY,
    aws_secret_access_key=SECRET
)

## Create the IAM role

In [4]:
try:
    print("Creating a new IAM Role \n")
    dwhRole = iam.create_role(
        Path="/",
        RoleName=DWH_IAM_ROLE_NAME,
        Description="Allows Redshift clusters to call AWS services.",
        AssumeRolePolicyDocument=json.dumps(
            {
                "Statement": [
                    {
                        "Action": "sts:AssumeRole",
                        "Effect": "Allow",
                        "Principal": {"Service": "redshift.amazonaws.com"},
                    }
                ],
                "Version": "2012-10-17",
            }
        ),
    )

except Exception as e:
    print(e)

Creating a new IAM Role 

An error occurred (EntityAlreadyExists) when calling the CreateRole operation: Role with name dwhRole already exists.


## Attach policy

In [5]:
print("Attaching Policy \n")

iam.attach_role_policy(
    RoleName=DWH_IAM_ROLE_NAME,
    PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
)['ResponseMetadata']['HTTPStatusCode']

Attaching Policy 



200

## Get the IAM role ARN

In [4]:
role_arn = iam.get_role(RoleName=DWH_IAM_ROLE_NAME)['Role']['Arn']

## Create a Redshift Cluster

In [7]:
try:
    response = redshift.create_cluster(        
        ClusterType=DWH_CLUSTER_TYPE,
        NodeType=DWH_NODE_TYPE,
        NumberOfNodes=int(DWH_NUM_NODES),
        DBName=DB_NAME,
        ClusterIdentifier=DWH_CLUSTER_IDENTIFIER,
        MasterUsername=DB_USER,
        MasterUserPassword=DB_PASSWORD,
        IamRoles=[role_arn]  
    )
except Exception as e:
    print(e)

### See cluster status

In [5]:
cluster_props = redshift.describe_clusters(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
cluster_props['ClusterStatus']

'available'

## Copy cluster Endpoint

In [6]:
host = cluster_props['Endpoint']['Address']

## Write config file

In [None]:
config = configparser.ConfigParser()

config['CLUSTER'] = {
    'HOST': host,
    'DB_NAME': DB_NAME,
    'DB_USER': DB_USER,
    'DB_PASSWORD': DB_PASSWORD,
    'DB_PORT':DB_PORT
}

config['IAM_ROLE'] = {'ARN': role_arn}

config['S3'] = {
    'LOG_DATA': 's3://udacity-dend/log_data/',
    'LOG_JSONPATH': 's3://udacity-dend/log_json_path.json',
    'SONG_DATA': 's3://udacity-dend/song_data/'
}

with open('dwh.cfg', 'w') as configfile:
    config.write(configfile)

<h1><font color='red'>CLEAN UP RESOURCES</h1> 

In [52]:
redshift.delete_cluster(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER,  SkipFinalClusterSnapshot=True)

{'Cluster': {'ClusterIdentifier': 'dwhcluster',
  'NodeType': 'dc2.large',
  'ClusterStatus': 'deleting',
  'ClusterAvailabilityStatus': 'Modifying',
  'MasterUsername': 'sparkify_deng',
  'DBName': 'dwh_sparkify',
  'Endpoint': {'Address': 'dwhcluster.c3ufykhk0jay.us-west-2.redshift.amazonaws.com',
   'Port': 5439},
  'ClusterCreateTime': datetime.datetime(2021, 11, 5, 21, 47, 16, 904000, tzinfo=tzutc()),
  'AutomatedSnapshotRetentionPeriod': 1,
  'ManualSnapshotRetentionPeriod': -1,
  'ClusterSecurityGroups': [],
  'VpcSecurityGroups': [{'VpcSecurityGroupId': 'sg-e7f90cec',
    'Status': 'active'}],
  'ClusterParameterGroups': [{'ParameterGroupName': 'default.redshift-1.0',
    'ParameterApplyStatus': 'in-sync'}],
  'ClusterSubnetGroupName': 'default',
  'VpcId': 'vpc-27aea25f',
  'AvailabilityZone': 'us-west-2c',
  'PreferredMaintenanceWindow': 'fri:13:00-fri:13:30',
  'PendingModifiedValues': {},
  'ClusterVersion': '1.0',
  'AllowVersionUpgrade': True,
  'NumberOfNodes': 4,
  'Pub

In [54]:
cluster_props = redshift.describe_clusters(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
cluster_props['ClusterStatus']

ClusterNotFoundFault: An error occurred (ClusterNotFound) when calling the DescribeClusters operation: Cluster dwhcluster not found.

In [55]:
iam.detach_role_policy(RoleName=DWH_IAM_ROLE_NAME, PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess")
iam.delete_role(RoleName=DWH_IAM_ROLE_NAME)

{'ResponseMetadata': {'RequestId': '184d90f9-a833-4310-aa7c-faa93573eb17',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '184d90f9-a833-4310-aa7c-faa93573eb17',
   'content-type': 'text/xml',
   'content-length': '200',
   'date': 'Fri, 05 Nov 2021 23:57:59 GMT'},
  'RetryAttempts': 0}}