# AWS Resources deployment using a Jupyter Notebook

In [None]:
# !python3 -m pip install boto3
# !python3 -m pip install requests
# !python3 -m pip install tqdm
# !python3 -m pip install pandas
# !python3 -m pip install s3fs
# !python3 -m pip install ipywidgets
# !python3 -m pip install -q -U paramiko
# !python3 -m pip install -q -U scp

In [None]:
import IPython
import boto3
import time
import os
import json
import requests
import pandas as pd
import paramiko
import scp
from zipfile import ZipFile
from urllib.request import urlopen
from tqdm.notebook import tqdm

#### Some basic settings

In [None]:
my_bucket_name = 'dantohe-my-experimental-iac-01'
my_region = 'us-west-2'
stem = 'my-experimental'
my_InstanceProfileName = f'{stem}-InstanceProfileName-iac-01'
ec2_pem_name      = f'{stem}-kp-june-2021-01'
my_role_name = f'{stem}-ec2-role-01'
my_security_group_name = f'{stem}-Airflow-security-group-01'

ec2 = boto3.client('ec2')
iam = boto3.client('iam')

#### Create an EC2 key-pair    
If the key already exists then don't do anything.    

In [None]:

key_exists = False

response = ec2.describe_key_pairs()['KeyPairs']
for key in response:
    if key['KeyName'] == ec2_pem_name:
        key_exists = True
    found_instance = ec2.describe_instances(
        Filters=[
            {
                'Name': 'key-name',
                'Values': [key['KeyName']]
            }
        ]
    )['Reservations']

if key_exists:
    print('key already exists')
else:
    ec2_pem_path = f'./{ec2_pem_name}.pem'
    if os.path.isfile(ec2_pem_path):
        os.remove(ec2_pem_path)
    ec2_keypair = ec2.create_key_pair(KeyName=ec2_pem_name)
    with open(ec2_pem_path, 'w+') as ec2_pem_file:
        ec2_pem_file.write(str(ec2_keypair['KeyMaterial']))
    !chmod 400 {ec2_pem_path}
    print(f'{ec2_pem_name} has been created sucessfully and the pem is available at\n{ec2_pem_path}')
    

## EC2 Resources    
IAM - refrences: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/iam.html#role   


### Create a role and attach the s3 access policies

In [None]:
roles = iam.list_roles()
role_list = roles['Roles']
ec2_role= None

for key in role_list:
    if key['RoleName'] == my_role_name:
        ec2_role = key

if ec2_role is not None:
    print(f'Role {my_role_name} already exists')
else:
    ec2_role = iam.create_role(
        Path='/',
        RoleName=my_role_name,
        Description='',
        MaxSessionDuration=3600,
        AssumeRolePolicyDocument="""{
      "Version": "2012-10-17",
      "Statement": [
        {
          "Effect": "Allow",
          "Principal": { "Service": "ec2.amazonaws.com"},
          "Action": "sts:AssumeRole"
        }
      ]
    }""".replace('<dw_bucket>', my_bucket_name))['Role']
    ### Also atach the S3 policy to the role
    for ec2_policy in  [
        'arn:aws:iam::aws:policy/AmazonS3FullAccess']:
        assert iam.attach_role_policy(
            RoleName=ec2_role['RoleName'],
            PolicyArn=ec2_policy)['ResponseMetadata']['HTTPStatusCode'] == 200
    
    print(f'{my_role_name} has been createed - the S3 policies have been attached')
#     ec2_role['Arn']

### Creates the instance profile AND adds the role to instance profile

In [None]:
instance_profiles = iam.list_instance_profiles()
instance_profiles_list = instance_profiles['InstanceProfiles']
ec2_instance_profile = None

# existing_instance_profile_names =[]

for key in instance_profiles_list:
    if key['InstanceProfileName'] == my_InstanceProfileName:
        ec2_instance_profile =key
#     existing_instance_profile_names.append(key['InstanceProfileName'])

# if my_InstanceProfileName in existing_instance_profile_names:
#     print(f'{my_InstanceProfileName} already exists')
if ec2_instance_profile is not None:
    print(f'{my_InstanceProfileName} already exists')
else:
    #creates the instaance profile
    ec2_instance_profile = iam.create_instance_profile(InstanceProfileName=my_InstanceProfileName)['InstanceProfile']
    iam.get_waiter('instance_profile_exists').wait(InstanceProfileName=my_InstanceProfileName)

    #adds the role to the instance profile
    assert iam.add_role_to_instance_profile(InstanceProfileName=ec2_instance_profile['InstanceProfileName'], RoleName=ec2_role['RoleName'])['ResponseMetadata']['HTTPStatusCode'] == 200
    print(f'{my_InstanceProfileName} has been created')

### Createting a security group

In [None]:
security_groups = ec2.describe_security_groups()
existing_security_groups = security_groups['SecurityGroups']

ec2_sg = None

for key in existing_security_groups:
    if key['GroupName'] == my_security_group_name:
      ec2_sg=key  
    
if ec2_sg is not None:
    print(f'The security group {my_security_group_name} already exists')
else:
    ec2_sg = ec2.create_security_group(
        Description='Allows 22 trafic',
        GroupName=my_security_group_name)
    ec2.authorize_security_group_ingress(CidrIp='0.0.0.0/0', FromPort=22, ToPort=22, GroupId=ec2_sg['GroupId'], IpProtocol='TCP')
    ec2.authorize_security_group_ingress(CidrIp='0.0.0.0/0', FromPort=8080, ToPort=8080, GroupId=ec2_sg['GroupId'], IpProtocol='TCP')
    ec2.authorize_security_group_ingress(CidrIp='0.0.0.0/0', FromPort=5555, ToPort=5555, GroupId=ec2_sg['GroupId'], IpProtocol='TCP')
    ec2.authorize_security_group_ingress(CidrIp='0.0.0.0/0', FromPort=3306, ToPort=3306, GroupId=ec2_sg['GroupId'], IpProtocol='TCP')
    print(f'The security group {my_security_group_name} has been created')
    print(f"SG ID: {ec2_sg['GroupId']}")

### Requesting spot instance(s)

In [None]:
# time.sleep(30) #wait instance profile...
#Amazon Linux AMI - it has some issues and complications with installing mysql and airflow
# ec2_ami_id = 'ami-0aeeebd8d2ab47354'
#defaulting to ubuntu
ec2_ami_id = 'ami-09e67e426f25ce0d7'
ec2_spot = ec2.request_spot_instances(
    AvailabilityZoneGroup='us-east-1',
    InstanceCount=1,
    LaunchSpecification={
        'SecurityGroupIds': [ec2_sg['GroupId']],
        'EbsOptimized': False,
        'KeyName': ec2_pem_name,
        'ImageId': ec2_ami_id,
        'InstanceType': 't3.large',
        'IamInstanceProfile': {
            'Arn': ec2_instance_profile['Arn']
        },
        "BlockDeviceMappings": [
            {
                "DeviceName": "/dev/sda1",
                "Ebs": {
                        "DeleteOnTermination": True,
                        "VolumeSize": 30,
                        "Encrypted": False,
                        "VolumeType": "gp2"
                }
            }
        ],
    },
    SpotPrice='0.10',
    Type='one-time',
    InstanceInterruptionBehavior='terminate'
)
ec2_spot_id = ec2_spot['SpotInstanceRequests'][0]['SpotInstanceRequestId']
ec2.get_waiter('spot_instance_request_fulfilled').wait(SpotInstanceRequestIds=[ec2_spot_id])
print(f'Spot instance request: {ec2_spot_id}')

### Gets the instance ID

In [None]:
ec2_vm_id = ec2.describe_spot_instance_requests(SpotInstanceRequestIds=[ ec2_spot_id ]) \
    ['SpotInstanceRequests'] \
    [0] \
    ['InstanceId']
ec2.get_waiter('instance_status_ok').wait(InstanceIds=[ ec2_vm_id ])
print(f'InstanceIds: {ec2_vm_id}')

### Allocating a public IP address

In [None]:
ec2_ip = ec2.allocate_address(Domain='vpc')
print(f"PublicIp: {ec2_ip['PublicIp']}\nAllocationId: {ec2_ip['AllocationId']}")

### Associates the IP address with the instance

In [None]:
ec2_vm_ip = ec2.associate_address(
     InstanceId = ec2_vm_id,
     AllocationId = ec2_ip["AllocationId"])
print(f"IP AssociationId: {ec2_vm_ip['AssociationId']}")

## SSH

### SSH utilities

In [None]:
def get_ssh(ip, pem_path):
    print(f"ssh -i {pem_path} ubuntu@{ip}")
    ssh = paramiko.SSHClient()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    ssh.connect(hostname=ip, username='ubuntu', pkey=paramiko.RSAKey.from_private_key_file(pem_path))
    return ssh

def run_via_ssh(
        ip,
        pem_path,
        commands,
        display_output=False):
    
    ssh = get_ssh(ip, pem_path)
    try:
        for command in tqdm(commands):
            stdin, stdout, stderr = ssh.exec_command(command)
            exit_status = stdout.channel.recv_exit_status()
            if exit_status == 0:
                print(('command executed successfuly:::', command))
                if display_output:
                    output_buffer = stdout.read().decode('utf-8')
                    if output_buffer:
                        print(f">>> {output_buffer}")
            else:
                error_buffer = stderr.read().decode('utf-8')
                print(('!!!failed', command))
                print(f"!!! {error_buffer}")
    finally:
        ssh.close()

### Instaling MySql and Airflow

In [None]:
run_via_ssh(
    ip=ec2_ip['PublicIp'],
    pem_path=ec2_pem_path,
    commands=[
        'sudo apt-get -y update',
        'sudo apt-get install -y libmysqlclient-dev mysql-server',
        f"sudo mysql -e \"SET GLOBAL explicit_defaults_for_timestamp = 1;\"",
        f"sudo mysql -e \"DROP DATABASE IF EXISTS airflow;\"",       
        f"sudo mysql -e \"CREATE DATABASE airflow CHARACTER SET UTF8mb3 COLLATE utf8_general_ci;\"",
        f"sudo mysql -e \"CREATE USER 'airflow'@'localhost' IDENTIFIED BY 'airflow';\"",
        f"sudo mysql -e \"GRANT ALL PRIVILEGES ON airflow.* TO 'airflow'@'localhost';\"",
        f"sudo apt install -y redis-server",
        'sudo apt-get install -y python3 python3-pip python3-setuptools',
        'sudo pip3 install -U pip',
        'sudo pip3 install -U apache-airflow',
        'sudo pip3 install -U apache-airflow[mysql]',
        'sudo pip3 install -U apache-airflow[celery]'
    ])

### Configure Airflow

In [None]:
run_via_ssh(ip=ec2_ip['PublicIp'],
    pem_path=ec2_pem_path,
    commands=[
        'airflow db init',
        'sudo apt-get install -y crudini',
        "crudini --set ~/airflow/airflow.cfg core load_examples False",
        "crudini --set ~/airflow/airflow.cfg core load_default_connections False",
        "crudini --set ~/airflow/airflow.cfg core sql_alchemy_conn 'mysql://airflow:airflow@localhost/airflow'",
        "crudini --set ~/airflow/airflow.cfg core executor CeleryExecutor",
        "crudini --set ~/airflow/airflow.cfg core sql_alchemy_schema airflow",
        "crudini --set ~/airflow/airflow.cfg scheduler min_file_process_interval 10",
        "crudini --set ~/airflow/airflow.cfg scheduler dag_dir_list_interval 60",
        "crudini --set ~/airflow/airflow.cfg celery result_backend 'redis://127.0.0.1:6379/0'",
        "crudini --set ~/airflow/airflow.cfg celery broker_url 'db+mysql://airflow:airflow@localhost/airflow'",
        'airflow db init',
    ])

### Create Airflow dag directory

In [None]:
run_via_ssh(
    ip=ec2_ip['PublicIp'],
    pem_path=ec2_pem_path,
    commands=[
        'mkdir -p ~/airflow/dags'
    ])

### Install python modules

In [None]:
run_via_ssh(
    ip=ec2_ip['PublicIp'],
    pem_path=ec2_pem_path,
    commands=[
        'sudo pip3 install -U tensorflow',
        'sudo pip3 install -U pandas',
        'sudo pip3 install -U scikit-learn',
        'sudo pip3 install -U numpy',
        'sudo pip3 install -U psycopg2-binary',
        'sudo pip3 install -U requests',
        'sudo pip3 install -U boto3',
        'sudo pip3 install -U matplotlib',
        'sudo pip3 install -U reportlab',
        'sudo pip3 install -U flower',
        'sudo pip3 install -U proj',
        'sudo pip3 install -U redis'
    ])

### Start Airflow

In [None]:
# airflow kerberos -D
# airflow scheduler -D
# airflow webserver -D
run_via_ssh(
    ip=ec2_ip['PublicIp'],
    pem_path=ec2_pem_path,
    commands=[
        'airflow users  create --role Admin --username admin --email admin --firstname admin --lastname admin --password admin',
        'airflow scheduler -D',
        'airflow celery worker -D',
        'airflow celery flower -D',
        'airflow webserver -p 8080 -D'
    ])

### Accesing the environment 

In [None]:
print(f"SSH      : ssh -i {ec2_pem_name} ubuntu@{ec2_ip['PublicIp']}")
print(f"WebServer: http://{ec2_ip['PublicIp']}:8080")
print(f"Flower   : http://{ec2_ip['PublicIp']}:5555")

## Clean Up

### Cancel thr spot instance request

In [None]:
ec2.cancel_spot_instance_requests(SpotInstanceRequestIds=[ ec2_spot_id ])
# ec2.cancel_spot_instance_requests(SpotInstanceRequestIds=[ 'sir-rk8sj4bj' ])

### Terminate the spot instance

In [None]:
ec2.terminate_instances(InstanceIds=[ ec2_vm_id ])
# ec2.terminate_instances(InstanceIds=[ 'i-080cf3bee321c8357' ])

### Wait for the instance to terminate and release the IP address 

In [None]:
ec2.get_waiter('instance_terminated').wait(InstanceIds=[ ec2_vm_id ])
ec2.release_address(AllocationId=ec2_ip['AllocationId'])

#if stuck needs to be removed manualy from the console ( VPC -> Elastic IPs)
# ec2.get_waiter('instance_terminated').wait(InstanceIds=['i-080cf3bee321c8357'])
# ec2.release_address(AllocationId='eipassoc-026af42e9e3b0fb02')



### Delete security group

In [None]:
ec2.delete_security_group(GroupId=ec2_sg['GroupId'])

# ec2.delete_security_group(GroupId='sg-0a1592d4d67e19433')

### Delete the key-pair

In [None]:
ec2.delete_key_pair(KeyName=ec2_pem_name)

### Detach and delete the role policies

In [None]:
for attached_policy in iam.list_attached_role_policies(RoleName=ec2_role['RoleName'])['AttachedPolicies']:
    iam.detach_role_policy(RoleName=ec2_role['RoleName'], PolicyArn=attached_policy['PolicyArn'])
for policy_name in iam.list_role_policies(RoleName=ec2_role['RoleName'])['PolicyNames']:
    iam.delete_role_policy(RoleName=ec2_role['RoleName'], PolicyName=policy_name)

### Remove role from instance profile

In [None]:
iam.remove_role_from_instance_profile(InstanceProfileName=ec2_instance_profile['InstanceProfileName'], RoleName=ec2_role['RoleName'])

### Delete instance profile

In [None]:
iam.delete_instance_profile(InstanceProfileName=ec2_instance_profile['InstanceProfileName'])

### Delete the role

In [None]:
iam.delete_role(RoleName=ec2_role['RoleName'])

## ALL DONE