In [243]:
from datetime import datetime
import pandas as pd
from pandas.tseries.holiday import USFederalHolidayCalendar as us_calendar
import awsprice
from bokeh.io import push_notebook, show, output_notebook
from bokeh.layouts import row
from bokeh.plotting import figure
output_notebook()

In [2]:
ec2_offers = awsprice.load_ec2_frame('local-data/budge.sqlite')

In [3]:
column_values = awsprice.column_values(ec2_offers)

In [284]:
def create_load_frame(start_time=None, end_time=None):
    frame_index = pd.period_range(
        start=start_time, end=end_time, freq='H', name='hour')
    frame = pd.DataFrame(index=frame_index, columns=['load'])
    return frame

In [285]:
def estimate_load(load_frame):
    days = load_frame.index.start_time.date
    holidays = us_calendar().holidays(start=days[0], end=days[-1])
    bdays = pd.bdate_range(start=days[0], end=days[-1]).difference(holidays)
    
    def load_by_hour(x):
        day = x.name.start_time.date()
        hour = x.name.start_time.hour
        load = 100
        if day in bdays:
            if 9 <= hour <= 19:
                load = 1000
            elif (7 <= hour < 9) or (19 < hour <= 21):
                load = 500
        return load

    load_frame['load'] = load_frame.apply(load_by_hour, axis=1)

In [286]:
def estimate_cost(load_frame, offer_frame, instance_type, load_capacity):
    column_name_base = instance_type.replace('.', '_')
    instance_count = '{}_count'.format(column_name_base)
    instance_cost = '{}_cost'.format(column_name_base)
    instance_cum_cost = '{}_cum_cost'.format(column_name_base)
    load_frame[instance_count] = load_frame.load.apply(
        lambda load: int(max(1, round(load / load_capacity))))
    rate = float(offer_frame[offer_frame.Instance_Type == instance_type].PricePerUnit)
    load_frame[instance_cost] = load_frame[instance_count] * rate
    load_frame[instance_cum_cost] = load_frame[instance_cost].cumsum()

In [291]:
hourly_load = create_load_frame(
    start_time=datetime(2017, 1, 1), 
    end_time=datetime(2017, 12, 31, 23))
estimate_load(hourly_load)

In [297]:
estimate_cost(hourly_load, ec2_offers, 't2.small', 40)
estimate_cost(hourly_load, ec2_offers, 't2.medium', 70)
estimate_cost(hourly_load, ec2_offers, 'm4.large', 250)

In [298]:
p = figure(title='comparative cost estimate', plot_width=900, plot_height=400, x_axis_type = "datetime")

for column_name, line_color in zip(['t2_small_cum_cost', 't2_medium_cum_cost', 'm4_large_cum_cost'], 
                                   ['blue', 'green', 'red']):
    p.line(x=hourly_load.index.start_time, y=hourly_load[column_name], 
           line_width=1, legend=column_name, line_color=line_color)
p.xaxis.axis_label = 'Time'
p.yaxis.axis_label = 'Cost (USD)'
p.legend.location = "top_left"
show(p)

In [294]:
hourly_load[['t2_small_cum_cost', 't2_medium_cum_cost', 'm4_large_cum_cost']].tail(1)

Unnamed: 0_level_0,t2_small_cum_cost,t2_medium_cum_cost,m4_large_cum_cost
hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-12-31 23:00,2087.71,2373.97,1945.08
