# Verify S3_BUCKET Bucket Creation

In [23]:
import boto3
import sagemaker
import time

session = boto3.session.Session()
region = session.region_name

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
sm = boto3.Session().client(service_name='sagemaker', region_name=region)

account_id = boto3.client('sts').get_caller_identity().get('Account')

s3 = boto3.Session().client(service_name='s3', region_name=region)

airflow_env_name = 'mwaa-'+region+'-'+account_id
airflow_vpc_name = 'mwaa-vpc'+region+'-'+account_id

In [2]:
setup_s3_bucket_passed=False
%store -r airflow_bucket_name
%store airflow_env_name
%store airflow_vpc_name

Stored 'airflow_env_name' (str)
Stored 'airflow_vpc_name' (str)


In [3]:
from botocore.client import ClientError

response = None

try:
    response = s3.head_bucket(Bucket=airflow_bucket_name)
    print(response)
    setup_s3_bucket_passed=True
except ClientError as e:
    print('[ERROR] Cannot find bucket {} in {} due to {}.'.format(airflow_bucket_name, response, e))

{'ResponseMetadata': {'RequestId': '5ABA1FE4AEA5886F', 'HostId': 'xppUbg9g+z4y9hrzW+iXlgyDQhVI2BKej9x9nAqSVUIvCfMi0KUk0WhkvbqPjTZ9c4JSuinv+6k=', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amz-id-2': 'xppUbg9g+z4y9hrzW+iXlgyDQhVI2BKej9x9nAqSVUIvCfMi0KUk0WhkvbqPjTZ9c4JSuinv+6k=', 'x-amz-request-id': '5ABA1FE4AEA5886F', 'date': 'Sat, 06 Feb 2021 01:57:19 GMT', 'x-amz-bucket-region': 'us-east-1', 'content-type': 'application/xml', 'server': 'AmazonS3'}, 'RetryAttempts': 0}}


In [4]:
%store setup_s3_bucket_passed

Stored 'setup_s3_bucket_passed' (bool)


# Configure Airflow DAG Files before uploading to S3 Bucket

In [5]:
with open('./dags/config.py', 'r') as f:
    lines = f.readlines()

with open('./dags/config.py', 'w') as f:
    for line in lines:
        line = line.replace('{0}', region)
        line = line.replace('{1}', bucket)
        f.write(line)

# Copy Airflow DAG to S3 Bucket

In [6]:
%store -r s3_mwaa_private_path
%store -r s3_mwaa_dags_private_path
%store -r s3_mwaa_pipeline_private_path

In [7]:
!aws s3 cp --recursive ./dags/pipeline $s3_mwaa_pipeline_private_path
!aws s3 cp ./dags/config.py $s3_mwaa_dags_private_path/config.py
!aws s3 cp ./dags/bert_reviews.py $s3_mwaa_dags_private_path/bert_reviews.py
!aws s3 cp ./dags/requirements.txt $s3_mwaa_private_path/requirements.txt

upload: dags/pipeline/__init__.py to s3://airflow-us-east-1-835319576252/dags/pipeline/__init__.py
upload: dags/pipeline/preprocess.py to s3://airflow-us-east-1-835319576252/dags/pipeline/preprocess.py
upload: dags/pipeline/prepare.py to s3://airflow-us-east-1-835319576252/dags/pipeline/prepare.py
upload: dags/config.py to s3://airflow-us-east-1-835319576252/dags/config.py
upload: dags/bert_reviews.py to s3://airflow-us-east-1-835319576252/dags/bert_reviews.py
upload: dags/requirements.txt to s3://airflow-us-east-1-835319576252/requirements.txt


In [8]:
!aws s3 ls $s3_mwaa_private_path
!aws s3 ls $s3_mwaa_dags_private_path

                           PRE dags/
2021-02-06 01:57:22         98 requirements.txt
                           PRE dags/


# Configure Temporary IAM Role Policy for MWAA VPC

In [24]:
#Check number of policies attached to TeamRole, we need to have nine
iam = boto3.resource('iam')
team_role_arn = iam.Role('TeamRole').arn

team_role = iam.Role('TeamRole')

aws_managed_policies = [p for p in team_role.attached_policies.all() ]            

if(len(aws_managed_policies) >= 10):    
    print('You have: {0} policies attached to TeamRole, you need downsize to 9 Policies so that we can add an MWAA VPC Creation Policy.'.format(len(aws_managed_policies)))
    print("Please do NOT continue unless until you run this and get a Success message")
else:
    print("Success! Please Continue...")    

Success! Please Continue...


In [25]:
mwaa_vpc_policy_json = open('./src/mwaa_vpc_policy.json', 'r').read()
mwaa_vpc_policy_json = mwaa_vpc_policy_json.replace('{0}',region)
mwaa_vpc_policy_json = mwaa_vpc_policy_json.replace('{1}',account_id)

In [26]:
%store team_role_arn

Stored 'team_role_arn' (str)


In [27]:
response = iam.create_policy(
    PolicyName='mwaa_vpc_policy',
    PolicyDocument=mwaa_vpc_policy_json
)

mwaa_vpc_policy_arn = response["Policy"]["Arn"]

response = iam.attach_role_policy(
    RoleName="TeamRole",
    PolicyArn=mwaa_vpc_policy_arn
)

EntityAlreadyExistsException: An error occurred (EntityAlreadyExists) when calling the CreatePolicy operation: A policy called mwaa_vpc_policy already exists. Duplicate names are not allowed.

# Create VPC MWAA Environment - Please be patient this can take around 10 Minutes.

In [15]:
cloudformation = boto3.resource('cloudformation')

mwaa_vpc_template_yaml = open('./cfn/mwaa_vpc_template.yaml', 'r').read()

response = cloudformation.create_stack(
    StackName='mwaa-vpc-stack',
    TemplateBody=mwaa_vpc_template_yaml,
    Parameters=[
        {
            'ParameterKey': 'EnvironmentName',
            'ParameterValue': airflow_vpc_name
        },
    ],
    ResourceTypes=[
        'AWS::EC2::VPC',
    ],
    OnFailure='ROLLBACK',
    EnableTerminationProtection=False
)

stack_status = 'IN_PROGRESS'

print ('Starting deployment of VPC {0} \n.'.format(airflow_vpc_name))

while stack_status != 'CREATE_COMPLETE':
    stack_status = cloudformation.Stack('mwaa-vpc-stack').stack_status
    time.sleep(30)
    print("Still waiting....")

print ("Sucess! VPC Deployment has completed.")

Starting deployment of VPC mwaa-vpcus-east-1-835319576252 
.
Still waiting....
Still waiting....
Still waiting....
Still waiting....
Still waiting....
Still waiting....
Still waiting....
Still waiting....
Sucess! VPC Deployment has completed.


In [19]:
vpc_outputs = cloudformation.Stack('mwaa-vpc-stack').outputs

airflow_sg_id = None
for output in vpc_outputs: 
    if output['OutputKey'] == 'IngressSecurityGroup': 
        airflow_sg_id = output['OutputValue'] 
        break
        
subnet_index_list = [0, 1]
airflow_subnet_ids = []

for i in subnet_index_list:
    airflow_subnet_ids.append(vpc_outputs[i]['OutputValue'])

In [20]:
%store airflow_sg_id
%store airflow_subnet_ids

Stored 'airflow_sg_id' (str)
Stored 'airflow_subnet_ids' (list)


# Configure IAM Role Policy for MWAA

In [33]:
#Remove MWAA VPC Policy only needed for VPC Creation
response = iam.detach_role_policy(
    RoleName="TeamRole",
    PolicyArn=mwaa_vpc_policy_arn
)

AttributeError: 'iam.Role' object has no attribute 'detach_role_policy'

In [30]:
#Check number of policies attached to TeamRole, we need to have nine
team_role = iam.Role('TeamRole')

aws_managed_policies = [p for p in team_role.attached_policies.all() ]            

if(len(aws_managed_policies) >= 10):    
    print('You have: {0} policies attached to TeamRole, you need downsize to 9 Policies so that we can add an MWAA Policy.'.format(len(aws_managed_policies)))
    print("Please do NOT continue unless until you run this and get a Success message")
else:
    print("Success! Please Continue...")

Success! Please Continue...


In [31]:
mwaa_policy_json = open('./src/mwaa_policy.json', 'r').read()
mwaa_policy_json = mwaa_policy_json.replace('{0}',region)
mwaa_policy_json = mwaa_policy_json.replace('{1}',account_id)
mwaa_policy_json = mwaa_policy_json.replace('{2}',airflow_env_name)
mwaa_policy_json = mwaa_policy_json.replace('{3}',airflow_bucket_name)

mwaa_assume_policy_json = open('./src/mwaa_assume_policy.json', 'r').read()

In [34]:
response = iam.create_policy(
    PolicyName='mwaa_policy',
    PolicyDocument=mwaa_policy_json
)

response = iam.attach_role_policy(
    RoleName="TeamRole",
    PolicyArn=response["Policy"]["Arn"]
)

response = iam.update_assume_role_policy(
    RoleName="TeamRole",
    PolicyDocument=mwaa_assume_policy_json
)

EntityAlreadyExistsException: An error occurred (EntityAlreadyExists) when calling the CreatePolicy operation: A policy called mwaa_policy already exists. Duplicate names are not allowed.

# Release Resources

In [None]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>

In [None]:
%%javascript

try {
    Jupyter.notebook.save_checkpoint();
    Jupyter.notebook.session.delete();
}
catch(err) {
    // NoOp
}