# AWS EC2 Pricing Comparison


## Analytics

### Parameters

In [1]:
# Data on disk
LOCAL_OFFER_FILE = 'data/ec2.json'

# Fetch from AWS
INDEX_BASE_URL = "https://pricing.us-east-1.amazonaws.com"
INDEX_PATH = "/offers/v1.0/aws/index.json"
SERVICE_CODE = "AmazonEC2"

# OS
OPERATING_SYSTEM = 'Linux'

# Location filter criteria
ALLOW_LOCATION_TYPE = ('AWS Region',)
DENY_LOCATION = ('AWS GovCloud (US)',)

# Something unique as magic string that doesn't collide
UNIQUE_STR = "*SDN+.@"

### Constants

In [None]:
ENUM_SIZE = {
    'micro': 1,
    'small': 2,
    'medium': 4,
    'large': 8,
    'xlarge': 16,
    '2xlarge': 32,
    '4xlarge': 64,
    '8xlarge': 128,
    '10xlarge': 160,
}

### Imports and functions

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import os, json

In [49]:
def json_fetch(url):
    """
    Fetch json from url, return parsed data structure
    """
    import requests
    return json.loads(requests.get(url).text)
    
def load_ec2_pricing():
    if LOCAL_OFFER_FILE and os.path.isfile(LOCAL_OFFER_FILE):
        with open(LOCAL_OFFER_FILE) as fp:
            return json.load(fp)
    
    offer_index = json_fetch(INDEX_BASE_URL + INDEX_PATH)
    return json_fetch(INDEX_BASE_URL + offer_index['offers'][SERVICE_CODE]['currentVersionUrl'])

def convolute(data, *functions):
    ptr = data
    for fp in functions:
        ptr = fp(ptr)
    
    return ptr

def explode_instance_type(instance_type):
    return instance_type.split('.')

In [40]:
def filter_prod_compute(products):
    return {
        k: v for k, v in products.items() if
        v['productFamily'] == 'Compute Instance' and v['attributes'].get('tenancy', '') == 'Shared'
    }

def filter_prod_os(products):
    return {
        k: v for k, v in products.items() if
        v['attributes'].get('operatingSystem', UNIQUE_STR) == OPERATING_SYSTEM
    }

def filter_prod_location(products):
    return {
        k: v for k, v in products.items() if
        'locationType' in v['attributes'] and 
        'location' in v['attributes'] and
        v['attributes']['locationType'] in ALLOW_LOCATION_TYPE and 
        v['attributes']['location'] not in DENY_LOCATION
    }

def filtergen_prod_tenancy(tenancy):
    return lambda products: {
        k: v for k, v in products.items() if
        v['attributes'].get('tenancy', UNIQUE_STR) == tenancy
    }

### Load Data from Disk or AWS

In [50]:
pricing = load_ec2_pricing()
products = convolute(
    pricing['products'],
    filter_prod_compute,
    filter_prod_os,
    filter_prod_location,
    filtergen_prod_tenancy('Shared'),
)

products_categorized = {}
for p in products.values():
    i_family, i_type = explode_instance_type(p['attributes']['instanceType'])
    products_categorized \
        .setdefault(p['attributes']['location'], {}) \
        .setdefault(i_family, {})[i_type] = p['sku']


{{len(pricing['products'])}} product entries found, {{len(products)}} among {{len(products_categorized)}} regions passed filtering criteria. 

In [51]:
products_categorized

{'Asia Pacific (Singapore)': {'c1': {'medium': 'YDP6BX3WNNZ488BZ',
   'xlarge': '8V5MYBMPUD434579'},
  'c3': {'2xlarge': 'PJ8AKRU5VVMS9DFN',
   '4xlarge': 'UKY8RWKR7MVYC863',
   '8xlarge': 'XUVJRQ9MSAQKDXE9',
   'large': 'ABMNUJ6SQ7A595A4',
   'xlarge': 'G9Z5RTPAVX5KWH4Z'},
  'c4': {'2xlarge': 'UKGPAABCGR48DYC4',
   '4xlarge': '3ZUGJVTA8NWE9NZT',
   '8xlarge': 'Z3DQKNTFUZ68H6TT',
   'large': 'QB3EG2XVBQ5BYA5F',
   'xlarge': 'U9CPUKN22CXMPGRV'},
  'd2': {'2xlarge': 'SKTEJ2QN2YW8UFKF',
   '4xlarge': 'M5ZT2V2ZMSBCEB2Q',
   '8xlarge': 'EEUHF7PCXDQT2MYE',
   'xlarge': 'DTZY5KW9NPT6V929'},
  'g2': {'2xlarge': 'VVKTWPMARM4HESXU', '8xlarge': 'DK6FJW8STXUGW6PA'},
  'hs1': {'8xlarge': 'EERGZVYFKRBMSYKW'},
  'i2': {'2xlarge': 'DKFKKEAW78H8X64T',
   '4xlarge': '8E9KB9CNE94Z4AHE',
   '8xlarge': 'VE5MWWHUXS2VR8DV',
   'xlarge': 'GCVKBN38AXXGHBQH'},
  'm1': {'large': '2B4AFZB6SYHMPZGS',
   'medium': '7FQD2RCMJSS57GFA',
   'small': '6R4QVUNHTJVS9J2S',
   'xlarge': '7TMGTEJPM5UPWQ8X'},
  'm2': {'2xlarg

In [46]:
# {p['attributes']['operation'] for p in products.values()}
[p for p in products_categorized.values() if p['attributes']['operation']]

[{'attributes': {'clockSpeed': '2.4  GHz',
   'currentGeneration': 'Yes',
   'dedicatedEbsThroughput': '750 Mbps',
   'enhancedNetworkingSupported': 'Yes',
   'instanceFamily': 'General purpose',
   'instanceType': 'm4.xlarge',
   'licenseModel': 'No License required',
   'location': 'US West (N. California)',
   'locationType': 'AWS Region',
   'memory': '16 GiB',
   'networkPerformance': 'High',
   'operatingSystem': 'Linux',
   'operation': 'RunInstances',
   'physicalProcessor': 'Intel Xeon E5-2676 v3 (Haswell)',
   'preInstalledSw': 'NA',
   'processorArchitecture': '64-bit',
   'processorFeatures': 'Intel AVX; Intel AVX2; Intel Turbo',
   'servicecode': 'AmazonEC2',
   'storage': 'EBS only',
   'tenancy': 'Shared',
   'usagetype': 'USW1-BoxUsage:m4.xlarge',
   'vcpu': '4'},
  'productFamily': 'Compute Instance',
  'sku': '2EBX6PMG5FBY92KC'},
 {'attributes': {'clockSpeed': '2.6 GHz',
   'currentGeneration': 'No',
   'instanceFamily': 'Compute optimized',
   'instanceType': 'cc2.8x

In [9]:
[p for idx, p in zip(range(3), products.values())]

[{'attributes': {'clockSpeed': '2.4 GHz',
   'currentGeneration': 'Yes',
   'dedicatedEbsThroughput': '1000 Mbps',
   'enhancedNetworkingSupported': 'Yes',
   'instanceFamily': 'General purpose',
   'instanceType': 'm4.2xlarge',
   'licenseModel': 'Bring your own license',
   'location': 'EU (Frankfurt)',
   'locationType': 'AWS Region',
   'memory': '32 GiB',
   'networkPerformance': 'High',
   'operatingSystem': 'Windows',
   'operation': 'RunInstances:0800',
   'physicalProcessor': 'Intel Xeon E5-2676 v3 (Haswell)',
   'preInstalledSw': 'NA',
   'processorArchitecture': '64-bit',
   'processorFeatures': 'Intel AVX; Intel AVX2; Intel Turbo',
   'servicecode': 'AmazonEC2',
   'storage': 'EBS only',
   'tenancy': 'Shared',
   'usagetype': 'EUC1-BoxUsage:m4.2xlarge',
   'vcpu': '8'},
  'productFamily': 'Compute Instance',
  'sku': '3A9TKKH86T7SYAF6'},
 {'attributes': {'clockSpeed': '2.5 GHz',
   'currentGeneration': 'Yes',
   'enhancedNetworkingSupported': 'Yes',
   'instanceFamily': 'M

# Results

{{SERVICE_CODE}}

In [None]:
from IPython.display import display
plt.plot(range(1,5), range(1,5))
print()

# Notes

## pricing['product'] sample