#  Cloud Data Warehouse _ Music Streaming Business 

Note:
This ipynd file contain 5 parts

    PART 1 _ Create IAM role and attach policy
        1.1 Parse 'dwh_1.cfg' file
        1.2 Create Resources & Clients
        1.3 Create IAM role
    Part 2 _ Create Redshift Cluster
        2.1 Create & Validate Cluster
        2.2 Set Security Group and CIDR
    Part 3 _ ETL
        3.1 Create Staging Tables & New Schema
        3.2 ETL (Load,Transform,Insert)
    Part 4 _ Data Analytics
        4.1 How many users in total?
        4.2 How many active users?
        4.3 Top 10 songs
        4.4 How many songs been played for each month?
    Part 5 _ Vacum/Analyze/Delete Cluster/Delete Role

Recommendation:
 - If you have AMAZON AWS account, and prefer a Iac approach, you can follow below steps, with only your AccessKey (KEY and SECRET).
 - If you already manually created role, policy, set security group and redshift clusters. you can jump to **PART 3** and start from there.


In [1]:
import pandas as pd
import boto3
import json
import configparser
from botocore.exceptions import ClientError
from time import time


# PART 1 _ Create IAM role and attach policy

## 1.1 Parse 'dwh_1.cfg' file

In [2]:
config = configparser.ConfigParser()
config.read_file(open('dwh.cfg'))

# admin accesskey
KEY = config.get('AWS','KEY')
SECRET = config.get('AWS','SECRET')

# configuration for creating redshift cluster
DWH_CLUSTER_TYPE = config.get('DWH','DWH_CLUSTER_TYPE')
DWH_NUM_NODES = config.get('DWH','DWH_NUM_NODES')
DWH_NODE_TYPE = config.get('DWH','DWH_NODE_TYPE')

DWH_CLUSTER_IDENTIFIER = config.get('DWH','DWH_CLUSTER_IDENTIFIER')
DWH_DB = config.get('DWH','DWH_DB')
DWH_DB_USER = config.get('DWH','DWH_DB_USER')
DWH_DB_PASSWORD = config.get('DWH','DWH_DB_PASSWORD')
DWH_PORT = config.get('DWH','DWH_PORT')

# name of IAM role, which shall also have S3readonly policy
DWH_IAM_ROLE_NAME = config.get('DWH','DWH_IAM_ROLE_NAME')


In [3]:
# print .cfg content to check
pd.DataFrame({"Param":
                  ["DWH_CLUSTER_TYPE", "DWH_NUM_NODES", "DWH_NODE_TYPE", "DWH_CLUSTER_IDENTIFIER", "DWH_DB", "DWH_DB_USER", "DWH_DB_PASSWORD", "DWH_PORT", "DWH_IAM_ROLE_NAME"],
              "Value":
                  [DWH_CLUSTER_TYPE, DWH_NUM_NODES, DWH_NODE_TYPE, DWH_CLUSTER_IDENTIFIER, DWH_DB, DWH_DB_USER, DWH_DB_PASSWORD, DWH_PORT, DWH_IAM_ROLE_NAME]
             })

Unnamed: 0,Param,Value
0,DWH_CLUSTER_TYPE,multi-node
1,DWH_NUM_NODES,4
2,DWH_NODE_TYPE,dc2.large
3,DWH_CLUSTER_IDENTIFIER,dwhCluster
4,DWH_DB,dwh
5,DWH_DB_USER,dwhuser
6,DWH_DB_PASSWORD,Passw0rd123
7,DWH_PORT,5439
8,DWH_IAM_ROLE_NAME,dwhRole


## 1.2 Create Resources & Clients

In [4]:
ec2 = boto3.resource('ec2',
                       region_name="us-west-2",
                       aws_access_key_id=KEY,
                       aws_secret_access_key=SECRET
                    )

s3 = boto3.resource('s3',
                       region_name="us-west-2",
                       aws_access_key_id=KEY,
                       aws_secret_access_key=SECRET
                   )

iam = boto3.client('iam',
                    aws_access_key_id=KEY,
                    aws_secret_access_key=SECRET,
                    region_name='us-west-2'
                  )

redshift = boto3.client('redshift',
                       region_name="us-west-2",
                       aws_access_key_id=KEY,
                       aws_secret_access_key=SECRET
                       )

print('---Resources and Clients Created.---')

---Resources and Clients Created.---


## 1.3 Create IAM role

In [5]:
# from botocore.exceptions import ClientError

# Create IAM role
try:
    print("Creating a new IAM Role...") 
    dwhRole1 = iam.create_role(
        Path='/',
        RoleName=DWH_IAM_ROLE_NAME,
        Description = "Allows Redshift clusters to call AWS services on your behalf.",
        AssumeRolePolicyDocument=json.dumps(
            {'Statement': [{'Action': 'sts:AssumeRole',
               'Effect': 'Allow',
               'Principal': {'Service': 'redshift.amazonaws.com'}}],
               'Version': '2012-10-17'})
    )    
except Exception as e:
    print(e)

# print('IAM role created.')

Creating a new IAM Role...
An error occurred (EntityAlreadyExists) when calling the CreateRole operation: Role with name dwhRole already exists.


In [6]:
# attaching policy AmazonS3ReadOnlyAccess

iam.attach_role_policy(RoleName=DWH_IAM_ROLE_NAME,
                       PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
                      )['ResponseMetadata']['HTTPStatusCode']

print("Get the IAM role ARN")
roleArn = iam.get_role(RoleName=DWH_IAM_ROLE_NAME)['Role']['Arn']

Get the IAM role ARN


In [7]:
print(roleArn)

arn:aws:iam::695143239337:role/dwhRole


# Part 2 _ Create Redshift Cluster

## 2.1 Create & Validate Cluster

In [8]:
try:
    response = redshift.create_cluster(        
        #cluster_config
        ClusterType=DWH_CLUSTER_TYPE,
        NodeType=DWH_NODE_TYPE,
        NumberOfNodes=int(DWH_NUM_NODES),

        #Identifiers & Credentials
        DBName=DWH_DB,
        ClusterIdentifier=DWH_CLUSTER_IDENTIFIER,
        MasterUsername=DWH_DB_USER,
        MasterUserPassword=DWH_DB_PASSWORD,
        
        #Roles (for s3 access)
        IamRoles=[roleArn]  
    )
except Exception as e:
    print(e)

In [12]:
# run below scripts to check status of cluster just created.

def prettyRedshiftProps(props):
    pd.set_option('display.max_colwidth', -1)
    keysToShow = ["ClusterIdentifier", "NodeType", "ClusterStatus", "MasterUsername", "DBName", "Endpoint", "NumberOfNodes", 'VpcId']
    x = [(k, v) for k,v in props.items() if k in keysToShow]
    return pd.DataFrame(data=x, columns=["Key", "Value"])

myClusterProps = redshift.describe_clusters(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
prettyRedshiftProps(myClusterProps)

Unnamed: 0,Key,Value
0,ClusterIdentifier,dwhcluster
1,NodeType,dc2.large
2,ClusterStatus,available
3,MasterUsername,dwhuser
4,DBName,dwh
5,Endpoint,"{'Address': 'dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com', 'Port': 5439}"
6,VpcId,vpc-e7f0c69f
7,NumberOfNodes,4


<span style="color:red">WARNING: Run below scripts ONLY after you check the status of the cluster is "AVAILABLE"</span>     

In [13]:
# DWH_ENDPOINT will be used to construct the CONN_String. to connect to the cluster
# DWH_ROLE_ARN  will be the credentials for Copy Command: copy data from S3 bucket

DWH_ENDPOINT = myClusterProps['Endpoint']['Address']
DWH_ROLE_ARN = myClusterProps['IamRoles'][0]['IamRoleArn']
print("DWH_ENDPOINT :: ", DWH_ENDPOINT)
print("DWH_ROLE_ARN :: ", DWH_ROLE_ARN)

DWH_ENDPOINT ::  dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com
DWH_ROLE_ARN ::  arn:aws:iam::695143239337:role/dwhRole


COPY DWH_ENDPOINT & DWH_ROLE_ARN  to .cfg file

## 2.2 Set Security Group and CIDR

In [14]:
# Here use the default security group
# set CIDR to '0.0.0.0/0', not limit to a subnet

try:
    vpc = ec2.Vpc(id=myClusterProps['VpcId'])
    defaultSg = list(vpc.security_groups.all())[0]
    print(defaultSg)
    defaultSg.authorize_ingress(
        GroupName=defaultSg.group_name,
        CidrIp='0.0.0.0/0',
        IpProtocol='TCP',
        FromPort=int(DWH_PORT),
        ToPort=int(DWH_PORT)
    )
except Exception as e:
    print(e)
    
# VPC security group might already exist. That's fine to see error in this case.

ec2.SecurityGroup(id='sg-ab8acd9b')
An error occurred (InvalidPermission.Duplicate) when calling the AuthorizeSecurityGroupIngress operation: the specified rule "peer: 0.0.0.0/0, TCP, from port: 5439, to port: 5439, ALLOW" already exists


# Part 3 _ ETL

Run "Create_table.py" 
This will create: 
 - a. Two staging table for copying source data from S3; 
 - b. New schema, 5 tables

In [None]:
# if you manually create cluster and role, uncomment below rows and paste endpoint and roleARN: 
# DWH_ENDPOINT = 
# DWH_ROLE_ARN = 

## 3.1 Create Staging Tables & New Schema

In [16]:
%run -i create_tables.py

drop all tables if exists
executing DROP TABLE IF EXISTS "staging_events";
executing DROP TABLE IF EXISTS staging_songs;
executing DROP TABLE IF EXISTS "songplays";
executing DROP TABLE IF EXISTS "users";
executing DROP TABLE IF EXISTS "songs";
executing DROP TABLE IF EXISTS "artists";
executing DROP TABLE IF EXISTS "time";
start creating new schema
executing 
CREATE TABLE "staging_events" (
    "artist" VARCHAR,
    "auth" VARCHAR,
    "firstName" VARCHAR,
    "gender" VARCHAR,
    "itemInSession" INT,
    "lastName" VARCHAR,
    "length" NUMERIC(9,5),
    "level" VARCHAR,
    "location" VARCHAR,
    "method" VARCHAR,
    "page" VARCHAR,
    "registration" BIGINT,
    "sessionId" INT,
    "song" VARCHAR,
    "status" INT,
    "ts" BIGINT,
    "userAgent" TEXT,
    "userId" INT
    );

executing 
CREATE TABLE "staging_songs" (
    "num_songs" INT, 
    "artist_id" VARCHAR, 
    "artist_latitude" numeric(8,5), 
    "artist_longitude" numeric(8,5),
    "artist_location" VARCHAR, 
    "ar

### CHECK: whether table created or not

In [17]:
%load_ext sql
conn_string="postgresql://{}:{}@{}:{}/{}".format(DWH_DB_USER, DWH_DB_PASSWORD, DWH_ENDPOINT, DWH_PORT,DWH_DB)
print(conn_string)
%sql $conn_string

postgresql://dwhuser:Passw0rd123@dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com:5439/dwh


'Connected: dwhuser@dwh'

In [18]:
%sql SELECT DISTINCT schemaname, tablename FROM "pg_table_def" WHERE schemaname='public';
# it should show 7 table names

 * postgresql://dwhuser:***@dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com:5439/dwh
7 rows affected.


schemaname,tablename
public,artists
public,songplays
public,songs
public,staging_events
public,staging_songs
public,time
public,users


In [23]:
%sql SELECT * FROM "pg_table_def" WHERE tablename='songplays';
# check table

 * postgresql://dwhuser:***@dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com:5439/dwh
9 rows affected.


schemaname,tablename,column,type,encoding,distkey,sortkey,notnull
public,songplays,songplay_id,integer,az64,False,0,True
public,songplays,start_time,timestamp with time zone,az64,False,0,True
public,songplays,user_id,integer,az64,False,0,True
public,songplays,level,character varying(256),lzo,False,0,False
public,songplays,song_id,character varying(256),lzo,True,0,False
public,songplays,artist_id,character varying(256),lzo,False,0,False
public,songplays,session_id,integer,az64,False,0,True
public,songplays,location,character varying(256),lzo,False,0,False
public,songplays,user_agent,character varying(256),lzo,False,0,False


## 3.2 ETL (Load,Transform,Insert)

In [20]:
%run etl.py

Start loading staging tables...
executing 
    copy staging_events from 's3://udacity-dend/log_data'
    credentials 'aws_iam_role=arn:aws:iam::695143239337:role/dwhRole'
    json 's3://udacity-dend/log_json_path.json';
 this process might takes several minutes...be patient.
executing 
    copy staging_songs from 's3://udacity-dend/song_data'
    credentials 'aws_iam_role=arn:aws:iam::695143239337:role/dwhRole'
    json 'auto';
 this process might takes several minutes...be patient.
All staging tables loaded
Start inserting job...
executing 
BEGIN transaction;
    DROP TABLE IF EXISTS temp_artists;
    
    -- only copy the distinct value from "staging_songs" to "temp_artists"
    CREATE TABLE temp_artists AS
    SELECT
        artist_id,
        artist_name,
        artist_location,
        artist_latitude,
        artist_longitude
    FROM
        (SELECT
            ROW_NUMBER() OVER (PARTITION BY artist_id order by artist_id) AS id_rank,
            artist_id,
            artist_na

### CHECK: how many data been inserted into each table.

In [21]:
%%sql 
SELECT CONCAT((SELECT count(*) FROM staging_events),' rows inserted into staging_events') AS "ETL job result"
UNION
SELECT CONCAT((SELECT count(*) FROM staging_songs),' rows inserted into staging_songs')
UNION
SELECT CONCAT((SELECT count(*) FROM artists),' rows inserted into artists')
UNION
SELECT CONCAT((SELECT count(*) FROM songs),' rows inserted into songs')
UNION
SELECT CONCAT((SELECT count(*) FROM users),' rows inserted into users')
UNION
SELECT CONCAT((SELECT count(*) FROM time),' rows inserted into time')
UNION
SELECT CONCAT((SELECT count(*) FROM songplays),' rows inserted into songplays')
; 

 * postgresql://dwhuser:***@dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com:5439/dwh
7 rows affected.


etl job result
97 rows inserted into users
14896 rows inserted into staging_songs
9553 rows inserted into artists
6813 rows inserted into time
301 rows inserted into songplays
14896 rows inserted into songs
8056 rows inserted into staging_events


In [22]:
%sql SELECT * FROM songplays where song_id IS NOT NULL LIMIT 10;

 * postgresql://dwhuser:***@dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com:5439/dwh
10 rows affected.


songplay_id,start_time,user_id,level,song_id,artist_id,session_id,location,user_agent
76,2018-11-20 07:01:13+00:00,15,paid,SOSDYAS12AB0180457,ARA3I0J1187FB57869,716,"Chicago-Naperville-Elgin, IL-IN-WI","""Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36"""
108,2018-11-29 11:03:31+00:00,78,free,SOROSRY12A6D4F7B64,AR0WBBL1187FB4677D,931,"Indianapolis-Carmel-Anderson, IN",Mozilla/5.0 (Windows NT 6.1; rv:31.0) Gecko/20100101 Firefox/31.0
126,2018-11-21 04:20:28+00:00,97,paid,SOMEFTJ12A6D4F8CAC,AR7S2271187FB38B1F,797,"Lansing-East Lansing, MI","""Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.94 Safari/537.36"""
134,2018-11-29 18:05:39+00:00,49,paid,SOMEFTJ12A6D4F8CAC,AR7S2271187FB38B1F,1041,"San Francisco-Oakland-Hayward, CA",Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0
142,2018-11-26 00:52:05+00:00,33,free,SONQEAO12A6D4F8CB3,AR7S2271187FB38B1F,827,"Eugene, OR","""Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36"""
182,2018-11-07 05:32:06+00:00,50,free,SOXQUPO12A6D4FC2B6,AR79C1C1187FB4C482,313,"New Haven-Milford, CT","""Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36"""
294,2018-11-16 16:27:21+00:00,90,free,SOMUJKC12AB01865AD,AR9RYZP1187FB36C6A,148,"Pensacola-Ferry Pass-Brent, FL",Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0
302,2018-11-13 17:28:33+00:00,97,paid,SOIBHYW12AB0188F49,ARWNARC122BCFCAFEB,537,"Lansing-East Lansing, MI","""Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.94 Safari/537.36"""
310,2018-11-21 21:56:47+00:00,15,paid,SOZCTXZ12AB0182364,AR5KOSW1187FB35FF4,818,"Chicago-Naperville-Elgin, IL-IN-WI","""Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36"""
318,2018-11-27 12:19:30+00:00,24,paid,SOKXJKN12A6D4F86D5,ARVRAUT1187FB39AFB,879,"Lake Havasu City-Kingman, AZ","""Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36"""


## Part 4 _ Data Analytics

## 4.1 How many users in total

In [24]:
%sql SELECT count(*) AS Total_Users_Count FROM "users";

 * postgresql://dwhuser:***@dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com:5439/dwh
1 rows affected.


total_users_count
97


## 4.2 How many active users? 
(login at least once this year)

In [25]:
%%sql
SELECT count(DISTINCT "user_id") AS Total_ActiveUsers_Count 
FROM "songplays" s
JOIN "time" t ON s."start_time" = t."start_time"
WHERE "year" IN (select MAX("year") FROM "time") AND "user_id" IS NOT NULL;

 * postgresql://dwhuser:***@dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com:5439/dwh
1 rows affected.


total_activeusers_count
53


## 4.3 Top 10 songs

In [26]:
%%sql
SELECT
    ROW_NUMBER()OVER(ORDER BY song_count."play_times" DESC) AS "No.",
    song_count."title",
    song_count."play_times",
    song_count."artist"
    FROM 
        (SELECT 
            s."title",
            count(sp."song_id") AS "play_times",
            a."name" AS "artist"
        FROM "songs" s
        JOIN "songplays" sp ON s."song_id" = sp."song_id"
        JOIN "artists" a ON s."artist_id" = a."artist_id"
        GROUP BY s."title",a."name"
        ORDER BY count(sp."song_id") DESC
        LIMIT 10
        ) AS song_count

 * postgresql://dwhuser:***@dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com:5439/dwh
10 rows affected.


no.,title,play_times,artist
1,You're The One,37,Dwight Yoakam
2,Catch You Baby (Steve Pitron & Max Sanna Radio Edit),9,Lonnie Gordon
3,I CAN'T GET STARTED,9,Ron Carter
4,Nothin' On You [feat. Bruno Mars] (Album Version),8,B.o.B
5,Hey Daddy (Daddy's Home),6,Usher
6,Up Up & Away,5,Kid Cudi
7,Mr. Jones,4,Counting Crows
8,Unwell (Album Version),4,matchbox twenty
9,Supermassive Black Hole (Album Version),4,Muse
10,Fade To Black,3,Metallica


## 4.4 How many songs been played for each month
(every plays counts even it is the same song)

In [27]:
%%sql
SELECT
    t.year,
    t.month,
    COUNT(sp.start_time) AS monthly_songs_played
FROM "songplays" sp 
JOIN "time" t ON sp.start_time = t.start_time
GROUP BY t.year, t.month
ORDER BY t.year, t.month;

 * postgresql://dwhuser:***@dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com:5439/dwh
1 rows affected.


year,month,monthly_songs_played
2018,11,301


## Part 5 _ Vacum/Analyze/Delete Cluster/Delete Role

In [None]:
%sql vacuum;

In [None]:
%sql analyze;

In [28]:
%sql DROP TABLE IF EXISTS "staging_events";
%sql DROP TABLE IF EXISTS "staging_songs";
%sql DROP TABLE IF EXISTS "songplays";
%sql DROP TABLE IF EXISTS "users";
%sql DROP TABLE IF EXISTS "songs";
%sql DROP TABLE IF EXISTS "artists";
%sql DROP TABLE IF EXISTS "time";

 * postgresql://dwhuser:***@dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com:5439/dwh
Done.
 * postgresql://dwhuser:***@dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com:5439/dwh
Done.
 * postgresql://dwhuser:***@dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com:5439/dwh
Done.
 * postgresql://dwhuser:***@dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com:5439/dwh
Done.
 * postgresql://dwhuser:***@dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com:5439/dwh
Done.
 * postgresql://dwhuser:***@dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com:5439/dwh
Done.
 * postgresql://dwhuser:***@dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com:5439/dwh
Done.


[]

In [29]:
# delete redshift cluster

redshift.delete_cluster( ClusterIdentifier=DWH_CLUSTER_IDENTIFIER,  SkipFinalClusterSnapshot=True)

{'Cluster': {'ClusterIdentifier': 'dwhcluster',
  'NodeType': 'dc2.large',
  'ClusterStatus': 'deleting',
  'MasterUsername': 'dwhuser',
  'DBName': 'dwh',
  'Endpoint': {'Address': 'dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com',
   'Port': 5439},
  'ClusterCreateTime': datetime.datetime(2021, 6, 9, 19, 56, 34, 807000, tzinfo=tzlocal()),
  'AutomatedSnapshotRetentionPeriod': 1,
  'ClusterSecurityGroups': [],
  'VpcSecurityGroups': [{'VpcSecurityGroupId': 'sg-ab8acd9b',
    'Status': 'active'}],
  'ClusterParameterGroups': [{'ParameterGroupName': 'default.redshift-1.0',
    'ParameterApplyStatus': 'in-sync'}],
  'ClusterSubnetGroupName': 'default',
  'VpcId': 'vpc-e7f0c69f',
  'AvailabilityZone': 'us-west-2d',
  'PreferredMaintenanceWindow': 'thu:12:30-thu:13:00',
  'PendingModifiedValues': {},
  'ClusterVersion': '1.0',
  'AllowVersionUpgrade': True,
  'NumberOfNodes': 4,
  'PubliclyAccessible': True,
  'Encrypted': False,
  'Tags': [],
  'EnhancedVpcRouting': False,
  'Iam

In [30]:
# check status of cluster
# deleting cluster might take several minutes, run multiple and make sure it's beed deleted

myClusterProps = redshift.describe_clusters(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
prettyRedshiftProps(myClusterProps)

Unnamed: 0,Key,Value
0,ClusterIdentifier,dwhcluster
1,NodeType,dc2.large
2,ClusterStatus,deleting
3,MasterUsername,dwhuser
4,DBName,dwh
5,Endpoint,"{'Address': 'dwhcluster.czwptbpzjzag.us-west-2.redshift.amazonaws.com', 'Port': 5439}"
6,VpcId,vpc-e7f0c69f
7,NumberOfNodes,4


In [None]:
# delete the IAM Role created: detach role policy, then delete role

iam.detach_role_policy(RoleName=DWH_IAM_ROLE_NAME, PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess")
iam.delete_role(RoleName=DWH_IAM_ROLE_NAME)
