# Yelp Manager Program
* EC2 management:
    * Launch, start, stop, etc.
* Data Collection:

In [1]:
# Modules
import boto3

import pandas as pd
import math
import numpy as np
import scipy as sp

import os
import sys
import pytz
import time
from datetime import datetime
from time import gmtime, strftime

import glob
import zipfile
import gzip
import shutil
import re
import json
from json import JSONDecoder, JSONDecodeError
from collections import defaultdict

# Pandas view options
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 200)
pd.set_option('precision', 4)

## Program Parameters

In [2]:
# Path parameters

In [3]:
# Access parameters
ACCESS_KEY_ID = 'REDACTED'
SECRET_ACCESS_KEY = 'REDACTED'
REGION = 'us-east-2'

In [4]:
# Resource parameters
MANAGER_ID = 'REDACTED'

HOMEPAGE = 'www.cdhanalytics.com'
HOSTED_ZONE_ID = 'REDACTED'

DNS_WAIT = 6
DNS_MAX_ATTEMPTS = 10

## Functions

In [18]:
def resource_init():
    success=1
    resources=dict()
    
    try:
        # EC2 Manager
        resources['ec2'] = boto3.resource('ec2')
        resources['manager'] = resources['ec2'].Instance(MANAGER_ID)
        resources['manager_state'] = resources['manager'].state['Code']
        resources['manager_ip'] = resources['manager'].public_ip_address

        # Route53
        resources['route53_client'] = boto3.client('route53')
        resources['route53_records'] = resources['route53_client'].list_resource_record_sets(HostedZoneId=HOSTED_ZONE_ID, StartRecordName=HOMEPAGE, MaxItems='1')
        resources['route53_record_ip'] = resources['route53_records']['ResourceRecordSets'][0]['ResourceRecords'][0]['Value']
    except:
        print('There was a problem accessing AWS resources!')
        success=0

    return success,resources

In [19]:
def update_dns_records(manager_ip, route53_client):
    ChangeBatch={
    "Comment": "Automatic DNS update",
    "Changes": [{
        "Action": "UPSERT",
        "ResourceRecordSet": {
            "Name": HOMEPAGE,
            "Type": "A",
            "TTL": 60,
            "ResourceRecords": [{"Value": manager_ip}]}}]}
    
    response = route53_client.change_resource_record_sets(HostedZoneId=HOSTED_ZONE_ID,ChangeBatch=ChangeBatch)
    return response

## Dynamic DNS

In [45]:
success,resources = resource_init()

if success==1:
    ec2=resources['ec2']
    manager=resources['manager']
    manager_ip=resources['manager_ip']
    route53_client=resources['route53_client']
    route53_records=resources['route53_records']
    route53_record_ip=resources['route53_record_ip']

In [35]:
# Check if op addresses match
if route53_record_ip != manager_ip:
    print('Submitting DNS record change request...')
    change_response = update_dns_records(manager_ip, route53_client)
    change_id = change_response['ChangeInfo']['Id']
    change_status = change_response['ChangeInfo']['Status']
    attempts = 0
    while change_status != 'INSYNC' or attempts < DNS_MAX_ATTEMPTS:
        print('Waiting for DNS record to update...')
        change_status = route53_client.get_change(Id=change_id)['ChangeInfo']['Status']
        time.sleep(DNS_WAIT)
        attempts += 1
        
    # Check if record was updated
    if change_status == 'INSYNC':
        #success,resources = resource_init()
        print('DNS record change successful! Now exiting.')
    else:
        print('Max attempts reached - DNS record update failed! Now exiting.')


Submitting DNS record change request...
Waiting for DNS record to update...
Waiting for DNS record to update...
Waiting for DNS record to update...
Waiting for DNS record to update...
Waiting for DNS record to update...
Waiting for DNS record to update...
Waiting for DNS record to update...
Waiting for DNS record to update...
Waiting for DNS record to update...
Waiting for DNS record to update...
DNS record change successful! Now exiting.


In [46]:
print(manager_ip)
print(route53_record_ip)

18.223.160.164
18.223.160.164


## Resource Management

### EC2

In [45]:
import boto3
ec2 = boto3.resource('ec2')

In [46]:
list(ec2.instances.all())

[ec2.Instance(id='i-08ef7be7a1c5ae1a4'),
 ec2.Instance(id='i-00cfae34e03f23e3b'),
 ec2.Instance(id='i-056e31bc3b07238b3')]

In [41]:
ec2.meta.client.describe_instance_status()['InstanceStatuses']

[]

In [None]:
instance = ec2.create_instances(
    BlockDeviceMappings=[
        {
            'DeviceName': 'string',
            'VirtualName': 'string',
            'Ebs': {
                'DeleteOnTermination': True|False,
                'Iops': 123,
                'SnapshotId': 'string',
                'VolumeSize': 123,
                'VolumeType': 'standard'|'io1'|'gp2'|'sc1'|'st1',
                'Encrypted': True|False,
                'KmsKeyId': 'string'
            },
            'NoDevice': 'string'
        },
    ],
    ImageId='string',
    InstanceType='t1.micro'|'t2.nano'|'t2.micro'|'t2.small'|'t2.medium'|'t2.large'|'t2.xlarge'|'t2.2xlarge'|'t3.nano'|'t3.micro'|'t3.small'|'t3.medium'|'t3.large'|'t3.xlarge'|'t3.2xlarge'|'t3a.nano'|'t3a.micro'|'t3a.small'|'t3a.medium'|'t3a.large'|'t3a.xlarge'|'t3a.2xlarge'|'m1.small'|'m1.medium'|'m1.large'|'m1.xlarge'|'m3.medium'|'m3.large'|'m3.xlarge'|'m3.2xlarge'|'m4.large'|'m4.xlarge'|'m4.2xlarge'|'m4.4xlarge'|'m4.10xlarge'|'m4.16xlarge'|'m2.xlarge'|'m2.2xlarge'|'m2.4xlarge'|'cr1.8xlarge'|'r3.large'|'r3.xlarge'|'r3.2xlarge'|'r3.4xlarge'|'r3.8xlarge'|'r4.large'|'r4.xlarge'|'r4.2xlarge'|'r4.4xlarge'|'r4.8xlarge'|'r4.16xlarge'|'r5.large'|'r5.xlarge'|'r5.2xlarge'|'r5.4xlarge'|'r5.8xlarge'|'r5.12xlarge'|'r5.16xlarge'|'r5.24xlarge'|'r5.metal'|'r5a.large'|'r5a.xlarge'|'r5a.2xlarge'|'r5a.4xlarge'|'r5a.8xlarge'|'r5a.12xlarge'|'r5a.16xlarge'|'r5a.24xlarge'|'r5d.large'|'r5d.xlarge'|'r5d.2xlarge'|'r5d.4xlarge'|'r5d.8xlarge'|'r5d.12xlarge'|'r5d.16xlarge'|'r5d.24xlarge'|'r5d.metal'|'r5ad.large'|'r5ad.xlarge'|'r5ad.2xlarge'|'r5ad.4xlarge'|'r5ad.8xlarge'|'r5ad.12xlarge'|'r5ad.16xlarge'|'r5ad.24xlarge'|'x1.16xlarge'|'x1.32xlarge'|'x1e.xlarge'|'x1e.2xlarge'|'x1e.4xlarge'|'x1e.8xlarge'|'x1e.16xlarge'|'x1e.32xlarge'|'i2.xlarge'|'i2.2xlarge'|'i2.4xlarge'|'i2.8xlarge'|'i3.large'|'i3.xlarge'|'i3.2xlarge'|'i3.4xlarge'|'i3.8xlarge'|'i3.16xlarge'|'i3.metal'|'i3en.large'|'i3en.xlarge'|'i3en.2xlarge'|'i3en.3xlarge'|'i3en.6xlarge'|'i3en.12xlarge'|'i3en.24xlarge'|'i3en.metal'|'hi1.4xlarge'|'hs1.8xlarge'|'c1.medium'|'c1.xlarge'|'c3.large'|'c3.xlarge'|'c3.2xlarge'|'c3.4xlarge'|'c3.8xlarge'|'c4.large'|'c4.xlarge'|'c4.2xlarge'|'c4.4xlarge'|'c4.8xlarge'|'c5.large'|'c5.xlarge'|'c5.2xlarge'|'c5.4xlarge'|'c5.9xlarge'|'c5.12xlarge'|'c5.18xlarge'|'c5.24xlarge'|'c5.metal'|'c5d.large'|'c5d.xlarge'|'c5d.2xlarge'|'c5d.4xlarge'|'c5d.9xlarge'|'c5d.18xlarge'|'c5n.large'|'c5n.xlarge'|'c5n.2xlarge'|'c5n.4xlarge'|'c5n.9xlarge'|'c5n.18xlarge'|'cc1.4xlarge'|'cc2.8xlarge'|'g2.2xlarge'|'g2.8xlarge'|'g3.4xlarge'|'g3.8xlarge'|'g3.16xlarge'|'g3s.xlarge'|'cg1.4xlarge'|'p2.xlarge'|'p2.8xlarge'|'p2.16xlarge'|'p3.2xlarge'|'p3.8xlarge'|'p3.16xlarge'|'p3dn.24xlarge'|'d2.xlarge'|'d2.2xlarge'|'d2.4xlarge'|'d2.8xlarge'|'f1.2xlarge'|'f1.4xlarge'|'f1.16xlarge'|'m5.large'|'m5.xlarge'|'m5.2xlarge'|'m5.4xlarge'|'m5.8xlarge'|'m5.12xlarge'|'m5.16xlarge'|'m5.24xlarge'|'m5.metal'|'m5a.large'|'m5a.xlarge'|'m5a.2xlarge'|'m5a.4xlarge'|'m5a.8xlarge'|'m5a.12xlarge'|'m5a.16xlarge'|'m5a.24xlarge'|'m5d.large'|'m5d.xlarge'|'m5d.2xlarge'|'m5d.4xlarge'|'m5d.8xlarge'|'m5d.12xlarge'|'m5d.16xlarge'|'m5d.24xlarge'|'m5d.metal'|'m5ad.large'|'m5ad.xlarge'|'m5ad.2xlarge'|'m5ad.4xlarge'|'m5ad.8xlarge'|'m5ad.12xlarge'|'m5ad.16xlarge'|'m5ad.24xlarge'|'h1.2xlarge'|'h1.4xlarge'|'h1.8xlarge'|'h1.16xlarge'|'z1d.large'|'z1d.xlarge'|'z1d.2xlarge'|'z1d.3xlarge'|'z1d.6xlarge'|'z1d.12xlarge'|'z1d.metal'|'u-6tb1.metal'|'u-9tb1.metal'|'u-12tb1.metal'|'a1.medium'|'a1.large'|'a1.xlarge'|'a1.2xlarge'|'a1.4xlarge',
    Ipv6AddressCount=123,
    Ipv6Addresses=[
        {
            'Ipv6Address': 'string'
        },
    ],
    KernelId='string',
    KeyName='string',
    MaxCount=123,
    MinCount=123,
    Monitoring={
        'Enabled': True|False
    },
    Placement={
        'AvailabilityZone': 'string',
        'Affinity': 'string',
        'GroupName': 'string',
        'PartitionNumber': 123,
        'HostId': 'string',
        'Tenancy': 'default'|'dedicated'|'host',
        'SpreadDomain': 'string'
    },
    RamdiskId='string',
    SecurityGroupIds=[
        'string',
    ],
    SecurityGroups=[
        'string',
    ],
    SubnetId='string',
    UserData='string',
    AdditionalInfo='string',
    ClientToken='string',
    DisableApiTermination=True|False,
    DryRun=True|False,
    EbsOptimized=True|False,
    IamInstanceProfile={
        'Arn': 'string',
        'Name': 'string'
    },
    InstanceInitiatedShutdownBehavior='stop'|'terminate',
    NetworkInterfaces=[
        {
            'AssociatePublicIpAddress': True|False,
            'DeleteOnTermination': True|False,
            'Description': 'string',
            'DeviceIndex': 123,
            'Groups': [
                'string',
            ],
            'Ipv6AddressCount': 123,
            'Ipv6Addresses': [
                {
                    'Ipv6Address': 'string'
                },
            ],
            'NetworkInterfaceId': 'string',
            'PrivateIpAddress': 'string',
            'PrivateIpAddresses': [
                {
                    'Primary': True|False,
                    'PrivateIpAddress': 'string'
                },
            ],
            'SecondaryPrivateIpAddressCount': 123,
            'SubnetId': 'string',
            'InterfaceType': 'string'
        },
    ],
    PrivateIpAddress='string',
    ElasticGpuSpecification=[
        {
            'Type': 'string'
        },
    ],
    ElasticInferenceAccelerators=[
        {
            'Type': 'string'
        },
    ],
    TagSpecifications=[
        {
            'ResourceType': 'client-vpn-endpoint'|'customer-gateway'|'dedicated-host'|'dhcp-options'|'elastic-ip'|'fleet'|'fpga-image'|'host-reservation'|'image'|'instance'|'internet-gateway'|'launch-template'|'natgateway'|'network-acl'|'network-interface'|'reserved-instances'|'route-table'|'security-group'|'snapshot'|'spot-instances-request'|'subnet'|'traffic-mirror-filter'|'traffic-mirror-session'|'traffic-mirror-target'|'transit-gateway'|'transit-gateway-attachment'|'transit-gateway-route-table'|'volume'|'vpc'|'vpc-peering-connection'|'vpn-connection'|'vpn-gateway',
            'Tags': [
                {
                    'Key': 'string',
                    'Value': 'string'
                },
            ]
        },
    ],
    LaunchTemplate={
        'LaunchTemplateId': 'string',
        'LaunchTemplateName': 'string',
        'Version': 'string'
    },
    InstanceMarketOptions={
        'MarketType': 'spot',
        'SpotOptions': {
            'MaxPrice': 'string',
            'SpotInstanceType': 'one-time'|'persistent',
            'BlockDurationMinutes': 123,
            'ValidUntil': datetime(2015, 1, 1),
            'InstanceInterruptionBehavior': 'hibernate'|'stop'|'terminate'
        }
    },
    CreditSpecification={
        'CpuCredits': 'string'
    },
    CpuOptions={
        'CoreCount': 123,
        'ThreadsPerCore': 123
    },
    CapacityReservationSpecification={
        'CapacityReservationPreference': 'open'|'none',
        'CapacityReservationTarget': {
            'CapacityReservationId': 'string'
        }
    },
    HibernationOptions={
        'Configured': True|False
    },
    LicenseSpecifications=[
        {
            'LicenseConfigurationArn': 'string'
        },
    ]
)

In [5]:
# create a new EC2 instance
instances = ec2.create_instances(
    ImageId='ami-0a7079a4a35da2ff5',
    MinCount=1,
    MaxCount=1,
    InstanceType='t2.micro',
    KeyName='AWS_yelp_analysis')

In [17]:
print(instances[0])
print(instances[0].image_id)

ec2.Instance(id='i-098e417a37ef015f9')
ami-0a7079a4a35da2ff5


In [22]:
result = instances[0].terminate()

In [23]:
result

{'TerminatingInstances': [{'CurrentState': {'Code': 32,
    'Name': 'shutting-down'},
   'InstanceId': 'i-098e417a37ef015f9',
   'PreviousState': {'Code': 16, 'Name': 'running'}}],
 'ResponseMetadata': {'RequestId': '53b4a82d-732d-45b4-9c7e-3a0143504584',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'text/xml;charset=UTF-8',
   'transfer-encoding': 'chunked',
   'vary': 'accept-encoding',
   'date': 'Wed, 04 Sep 2019 20:26:23 GMT',
   'server': 'AmazonEC2'},
  'RetryAttempts': 0}}

### Route 53

In [24]:
client = boto3.client('route53')
waiter = client.get_waiter('resource_record_sets_changed')

In [None]:
response = client.change_resource_record_sets(
    HostedZoneId='string',
    ChangeBatch={
        'Comment': 'string',
        'Changes': [
            {
                'Action': 'CREATE'|'DELETE'|'UPSERT',
                'ResourceRecordSet': {
                    'Name': 'string',
                    'Type': 'SOA'|'A'|'TXT'|'NS'|'CNAME'|'MX'|'NAPTR'|'PTR'|'SRV'|'SPF'|'AAAA'|'CAA',
                    'SetIdentifier': 'string',
                    'Weight': 123,
                    'Region': 'us-east-1'|'us-east-2'|'us-west-1'|'us-west-2'|'ca-central-1'|'eu-west-1'|'eu-west-2'|'eu-west-3'|'eu-central-1'|'ap-southeast-1'|'ap-southeast-2'|'ap-northeast-1'|'ap-northeast-2'|'ap-northeast-3'|'eu-north-1'|'sa-east-1'|'cn-north-1'|'cn-northwest-1'|'ap-east-1'|'me-south-1'|'ap-south-1',
                    'GeoLocation': {
                        'ContinentCode': 'string',
                        'CountryCode': 'string',
                        'SubdivisionCode': 'string'
                    },
                    'Failover': 'PRIMARY'|'SECONDARY',
                    'MultiValueAnswer': True|False,
                    'TTL': 123,
                    'ResourceRecords': [
                        {
                            'Value': 'string'
                        },
                    ],
                    'AliasTarget': {
                        'HostedZoneId': 'string',
                        'DNSName': 'string',
                        'EvaluateTargetHealth': True|False
                    },
                    'HealthCheckId': 'string',
                    'TrafficPolicyInstanceId': 'string'
                }
            },
        ]
    }
)

### Change S3 Storage Class - PENDING

In [None]:
# UNTESTED - This is one possible way to change S3 storage class - UNTESTED #
import boto3

s3 = boto3.client('s3')

copy_source = {
    'Bucket': 'mybucket',
    'Key': 'mykey'
}

s3.copy(
  copy_source, 'mybucket', 'mykey',
  ExtraArgs = {
    'StorageClass': 'STANDARD_IA',
    'MetadataDirective': 'COPY'
  }
)

### Delete Legacy Glacier Archives - DONE

In [50]:
# Connect to resource and start job
glacier = boto3.resource('glacier',region_name='us-east-1')
vault = glacier.Vault('251131218783','yelp_zip_files')
job = vault.initiate_inventory_retrieval()

In [109]:
# Check job status
job.load()
print('Job Action:',job.action)
print('Job Status:',job.status_code)

Job Action: InventoryRetrieval
Job Status: Succeeded


In [110]:
# Get archive ids from job output
output = job.get_output()
inventory = output['body'].read().decode("utf-8")
inventory_json = json.loads(inventory)
archive_ids = [item['ArchiveId'] for item in inventory_json['ArchiveList']]

In [131]:
# Delete archives
responses = []

for idx in archive_ids:
    response = glacier.Archive('251131218783','yelp_zip_files',idx).delete()
    responses.append(response)

In [145]:
# Check delete status
status_codes = [item['ResponseMetadata']['HTTPStatusCode'] for item in responses]
print('Number status codes:',len(status_codes))
print('Number unique status codes:', len(set(status_codes)))
if len(set(status_codes))==1:
    print('Unique status code value:', status_codes[0])

Number status codes: 64
Number unique status codes: 1
Unique status code value: 204


In [136]:
# Display archive ids, just in case...
archive_ids

['K5mghNuN7M6Ya-ZicOAsxVgpmhLbhrMhJ1qFpb3lXwaq2GDAgLBUXPzD66M20SLzTuQEOswrtd7USpQi5oLOx_B7-XL1xNW3qSzWT2rvVm3cPsTLHc6YL3Csgo-ESTyO-x5CpX9nTw',
 'FSFhqsSpV2Xy_7NaOYER_BWMFR_m6n91kqMSjTbiM6jjjN7NYs-OT2awb5DOqW8LuzGwyElG0_I7f3PfwVqqqR8rpvmvO9SfQHhvf0J67Pl19XwbOA7zChzhloxIpRrT6ef3lWjBlw',
 'yA0jpCKAGLCldneszY_hhkzrmGKaq3qWK9mGI0L_E4JmBmdkh7HMw152oDQ_TfZ21r-glJpymr5B99_WjY_yNfPQ9EV_QRbJfl8lvKpLF36uXYFNdACdZPZs3ixyLkgoEYUisRZVeQ',
 'bwlZ4VxYFClmCBOUJQ7yirx1aRdxSAgLeBSWldmYl7jTLrBIEJ1rYsryFHuRM-PoJwC4m9iuCzIM5KsYrgsfm13g91pQI2lbK3Tp63i-1ZpHolHhKLsXx7cxDlga9L8Dy3h8Qqv26Q',
 'BuvznCFIsD7IZipu0IY2HRMgqtRDJz2-tpS9R_9fWlirpYBe1hUso8E705ZnxItNsnZEH4-4r8oBtNVVwm8MHXTw3NffSzyAT19UKiHyCHsqjOJkb1fDwYtj930o9BKNogCQERb2zA',
 'SkW3Kb_PAvMljZchWZS2hnA5Z5U6Dp-joCZUVK7_jvFIxkMu4gOtM4eFRwUQny4R2eH6Zr5OddDA8-xo_bJ_mWi3nB-hnAAB42bqEUNyMqw2BBJljnIH_F0sjz9ILo6yKu8y3y5R7A',
 'qsG7hV7-2EHeRH5j98W38gAGskV7-dX23U0E_790RcRSCa7X-bpnsyBZTucPxfvo28weUdTr-mxyxXQBg_XyzrQUSyjjVg3aulKyuabPtBS8UdKtYC9WjhjnjMft8DByNTZ6rTtMFg',