In [None]:
!pip install awswrangler

# Create Redshift

In [1]:
import boto3
import secrets
import string
import json
import time
from datetime import datetime

In [2]:
rdsht = boto3.client('redshift')
scm = boto3.client('secretsmanager')

### generate password

In [9]:
alphabet = string.ascii_letters + string.digits
password = ''.join(secrets.choice(alphabet) for i in range(20)) # for a 20-character password

## Configuration of redshift

In [10]:
# redshift configuration
DBName='dev'
ClusterIdentifier='redshift-beyoung-' + datetime.today().strftime('%Y%m%d-%H-%M')
NodeType='ra3.4xlarge'
MasterUsername='awsuser'

In [11]:
rdsht.create_cluster(DBName=DBName, ClusterIdentifier=ClusterIdentifier, NodeType='ra3.4xlarge',
                     MasterUsername=MasterUsername,MasterUserPassword=password, NumberOfNodes=2, PubliclyAccessible=False)

{'Cluster': {'ClusterIdentifier': 'redshift-beyoung-20200922-10-07',
  'NodeType': 'ra3.4xlarge',
  'ClusterStatus': 'creating',
  'ClusterAvailabilityStatus': 'Modifying',
  'MasterUsername': 'awsuser',
  'DBName': 'dev',
  'AutomatedSnapshotRetentionPeriod': 1,
  'ManualSnapshotRetentionPeriod': -1,
  'ClusterSecurityGroups': [],
  'VpcSecurityGroups': [{'VpcSecurityGroupId': 'sg-c81633b2',
    'Status': 'active'}],
  'ClusterParameterGroups': [{'ParameterGroupName': 'default.redshift-1.0',
    'ParameterApplyStatus': 'in-sync'}],
  'ClusterSubnetGroupName': 'default',
  'VpcId': 'vpc-bcef6bda',
  'PreferredMaintenanceWindow': 'sat:08:00-sat:08:30',
  'PendingModifiedValues': {'MasterUserPassword': '****'},
  'ClusterVersion': '1.0',
  'AllowVersionUpgrade': True,
  'NumberOfNodes': 2,
  'PubliclyAccessible': False,
  'Encrypted': False,
  'Tags': [],
  'EnhancedVpcRouting': False,
  'IamRoles': [],
  'MaintenanceTrackName': 'current',
  'DeferredMaintenanceWindows': [],
  'NextMaint

In [None]:
status = ''
while status != 'Available':
    status=rdsht.describe_clusters(ClusterIdentifier=ClusterIdentifier)['Clusters'][0]['ClusterAvailabilityStatus']
    print(status)
    time.sleep(3)

In [14]:
response=rdsht.describe_clusters(ClusterIdentifier=ClusterIdentifier)['Clusters'][0]
vpcid = response['VpcId']
sg = response['VpcSecurityGroups'][0]['VpcSecurityGroupId']
endpoint = response['Endpoint']

## Save connection info to secrete manager

In [18]:
# secrete manager configuration
scName=ClusterIdentifier
scString=json.dumps({'username':MasterUsername, 'password':password, 'host':endpoint['Address'], 'port':endpoint['Port'], 'dbname': DBName})
scm.create_secret(Name=scName, Description=scName, SecretString=scString)

{'ARN': 'arn:aws:secretsmanager:us-west-2:232367251376:secret:redshift-beyoung-20200922-10-07-CIQS68',
 'Name': 'redshift-beyoung-20200922-10-07',
 'VersionId': '9daa17d6-df67-4a70-9c7c-3a07f4cd6371',
 'ResponseMetadata': {'RequestId': 'e498c731-fe04-4d6a-9b98-9da5b868034c',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Tue, 22 Sep 2020 10:26:52 GMT',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '193',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'e498c731-fe04-4d6a-9b98-9da5b868034c'},
  'RetryAttempts': 0}}

## Creating an IAM role to allow your Amazon Redshift cluster to access AWS services

In [27]:
iam = boto3.client('iam')

In [28]:
# configuration iam role
# https://docs.aws.amazon.com/redshift/latest/mgmt/authorizing-redshift-service.html
role='beyoung.redshift.copyunload.role'
assume_role_policy_document={
    "Version": "2012-10-17",
    "Statement": [
        {
        "Effect": "Allow",
        "Principal": {
            "Service": "redshift.amazonaws.com"
        },
        "Action": "sts:AssumeRole"
        }
    ]    
}
execution_policy_arn=[
    'arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess',
    'arn:aws:iam::aws:policy/AWSGlueConsoleFullAccess',
    'arn:aws:iam::aws:policy/AmazonAthenaFullAccess'   
]

In [33]:
role_arn=iam.create_role(RoleName=role, AssumeRolePolicyDocument=json.dumps(assume_role_policy_document))['Role']['Arn']
[iam.attach_role_policy(RoleName=role,PolicyArn=policy) for policy in execution_policy_arn];

In [35]:
# Attach role to redshift
rdsht.modify_cluster_iam_roles(ClusterIdentifier=ClusterIdentifier,AddIamRoles=[role_arn]);

# Connect to Redshift

In [21]:
import awswrangler as wr

In [22]:
response = scm.get_secret_value(SecretId=scName)
dbInfo = json.loads(response['SecretString'])

In [23]:
eng_redshift = wr.db.get_engine(db_type='redshift',host=dbInfo['host'],port=dbInfo['port'],
                          database=dbInfo['dbname'],user=dbInfo['username'],password=dbInfo['password'])

In [26]:
sql='select schemaname, tablename from pg_tables order by schemaname, tablename;'
tables = []
output = eng_redshift.execute(sql)
for row in output:
    tables.append(row)
tables

# Pause Redshift

In [None]:
rdsht.pause_cluster(ClusterIdentifier=ClusterIdentifier);

# Clean up resources

In [None]:
# # delete redshift cluster
# respones=rdsht.delete_cluster(ClusterIdentifier=ClusterIdentifier, SkipFinalClusterSnapshot=True)
# respones

In [None]:
# delete role
# [iam.detach_role_policy(RoleName=role,PolicyArn=policy) for policy in execution_policy_arn]
# iam.delete_role(RoleName=role)

In [None]:
# import psycopg2
# from sqlalchemy import create_engine
# from sqlalchemy import text

# engine_string=f"postgresql+psycopg2://{rdsht_user}:{rdsht_pass}@{rdsht_endpoint}:{rdsht_port}/{DBName}"
# engine = create_engine(engine_string)