# Amazon Redshift - Create Cluster

TODO: Describe scenario

<img src="img/redshift_setup.png" width="45%" align="left">

In [None]:
import boto3
import sagemaker

# Get region 
session = boto3.session.Session()
region_name = session.region_name

# Get SageMaker session & default S3 bucket
sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()


## Setup Amazon Redshift

To create an Amazon Redshift cluster, follow these steps:


### Collect Configuration Parameters (VPC ID, Security Group ID etc.)

In [None]:
%%bash

#### Get VPC ID
# --filters "Name=tag:Name,Values=eksctl-${AWS_CLUSTER_NAME}-cluster/VPC"
# Make sure this VPC is the same this notebook is running within
# Make sure this VPC has the following 2 properties enabled
#     DNS resolution = Enabled
#     DNS hostnames = Enabled
# This allows private, internal access to Redshift from this SageMaker notebook using the fully qualified endpoint name

export vpc_id=$(aws ec2 describe-vpcs  --query "Vpcs[0].VpcId" --output text)
export sub_id=$(aws ec2 describe-subnets --filters "Name=vpc-id,Values=${vpc_id}" --query "Subnets[0].SubnetId" --output text)

# --group-name eks-fsx-security-group
# --description "FSx for Lustre Security Group"
#  --vpc-id ${vpc_id}
# --query "Groups[0].GroupId"
export sec_id=$(aws ec2 describe-security-groups --filters "Name=vpc-id,Values=${vpc_id}" --query "SecurityGroups[0].GroupId"  --output text)
echo $sec_id

In [None]:
# TODO: This security group might need to have port 5349 open
# COPY FROM ABOVE
SECURITY_GROUP_ID='xxxxxx'

### Define Redshift Parameters

In [None]:
# Redshift configuration parameters
DB_NAME = 'dsoaws'
CLUSTER_IDENTIFIER = 'dsoaws'
CLUSTER_TYPE = 'multi-node'

# Note that only some Instance Types support Redshift Query Editor 
# (https://docs.aws.amazon.com/redshift/latest/mgmt/query-editor.html)
NODE_TYPE = 'dc2.large'
NUMBER_NODES = '2' 

MASTER_USER_NAME = 'dsoaws'
MASTER_USER_PW = '<password>'

# TODO: Must create a new IAM Role with at least S3 Access to your data bucket that you are loading into Redshift
IAM_ROLE = '<IAM_ROLE>'


### Create Redshift Cluster

In [None]:
redshift = boto3.client('redshift')

response = redshift.create_cluster(
        DBName=DB_NAME,
        ClusterIdentifier=CLUSTER_IDENTIFIER,
        ClusterType=CLUSTER_TYPE,
        NodeType=NODE_TYPE,
        NumberOfNodes=int(NUMBER_NODES),       
        MasterUsername=MASTER_USER_NAME,
        MasterUserPassword=MASTER_USER_PW,
        IamRoles=[IAM_ROLE],
        VpcSecurityGroupIds=[SECURITY_GROUP_ID],
        Port=5439,
        PubliclyAccessible=False
)

print(response)
