# BigDataX Project Notebook

The first thing I will be looking at is the difference in spot prices before and after the 2017 change to the AWS Spot Market, using an integration function. This will tell me the total cost over the 3 month period for that specific VM, and I can divide that number by 2160 to get the total cost per hour. Analysis from this point can diverge in a variety of ways.

In [None]:
import pandas as pd
pd.TimeSeries = pd.Series
import numpy as np
import sys
from matplotlib import pyplot as plt
from matplotlib import dates as mpl_dates
from datetime import datetime, timedelta
from tqdm import tqdm
from scipy import integrate
from scipy.interpolate import interp1d
import math

#importing required packages/libraries

In [None]:
print(plt.style.available) #available styles built into matplotlib

In [None]:
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) #RUN THIS CELL TO GO BACK TO DEFAULTS

In [None]:
df1 = pd.read_csv('Data/spot_0601.tsv', sep='\t', header=None) #importing spot_0601 file

In [None]:
df1.columns=['SPH','AZ','IT','OS','SP','DT'] #setting column names
df1.drop(columns=['SPH'],inplace=True) #drops the SPH column and saves that change

#DO NOT RUN THIS CELL MORE THAN ONCE

In [None]:
df1.sort_values(by=['AZ','IT','SP'],inplace=True) #sorting and saving changes

In [None]:
df1['DT'] = pd.to_datetime(df1['DT']) #converting to datetimes and saving changes

In [None]:
df1.sort_values(by=['AZ','IT','OS','DT'],inplace=True) #sorting and saving changes

In [None]:
## Integration Function: divides function by 3600 so units line up
    # adapted from https://nbviewer.jupyter.org/gist/metakermit/5720498

def integrate_method(self, how='trapz', unit='s'):
    '''Numerically integrate the time series.

    @param how: the method to use (trapz by default)
    @return 

    Available methods:
     * trapz - trapezoidal
     * cumtrapz - cumulative trapezoidal
     * simps - Simpson's rule
     * romb - Romberger's rule

    See http://docs.scipy.org/doc/scipy/reference/integrate.html for the method details.
    or the source code
    https://github.com/scipy/scipy/blob/master/scipy/integrate/quadrature.py
    '''
    available_rules = set(['trapz', 'cumtrapz', 'simps', 'romb'])
    if how in available_rules:
        rule = integrate.__getattribute__(how)
    else:
        print('Unsupported integration rule: %s' % (how))
        print('Expecting one of these sample-based integration rules: %s' % (str(list(available_rules))))
        raise AttributeError
    
    result = (rule(self.values, self.index.astype(np.int64) / 10**9))/3600
    #result = rule(self.values)
    return result

pd.Series.integrate = integrate_method

## Part 1: Obtaining "after" values using the integration function

##### df1 - df4

### Manual Calculations

#### AZ = us-east-1a
#### IT = c4 family
#### OS = Linux/UNIX

In [None]:
df_x1 = df1.loc[(df1['AZ'] == 'us-east-1a') & (df1['IT'] == 'c4.large') & (df1['OS'] == 'Linux/UNIX')]

x1 = df_x1['SP'].values
y1 = df_x1['DT'].values
ts1 = pd.Series(x1,y1)

print('The total price you would have to pay for having this particular VM over the 3 month period is:')
v1 = ts1.integrate().round(4)
print(v1)
print(' ')

print('The total cost per hour you would have to pay for having this particular VM for 3 months is:')
v1_h = (v1/2160).round(4)
print(v1_h)

#print(' ')
#print(df_x1['SP'].mean().round(4))
#print(2160*df_x1['SP'].mean().round(4)) #checking that integration function works

In [None]:
df_x2 = df1.loc[(df1['AZ'] == 'us-east-1a') & (df1['IT'] == 'c4.xlarge') & (df1['OS'] == 'Linux/UNIX')]

x2 = df_x2['SP'].values
y2 = df_x2['DT'].values
ts2 = pd.Series(x2,y2)

print('The total price you would have to pay for having this particular VM over the 3 month period is:')
v2 = ts2.integrate().round(4)
print(v2)
print(' ')

print('The total cost per hour you would have to pay for having this particular VM for 3 months is:')
v2_h = (v2/2160).round(4)
print(v2_h)

#print(' ')
#print(df_x2['SP'].mean().round(4))
#print(2160*df_x2['SP'].mean().round(4)) #checking that integration function works

In [None]:
df_x3 = df1.loc[(df1['AZ'] == 'us-east-1a') & (df1['IT'] == 'c4.2xlarge') & (df1['OS'] == 'Linux/UNIX')]

x3 = df_x3['SP'].values
y3 = df_x3['DT'].values
ts3 = pd.Series(x3,y3)

print('The total price you would have to pay for having this particular VM over the 3 month period is:')
v3 = ts3.integrate().round(4)
print(v3)
print(' ')

print('The total cost per hour you would have to pay for having this particular VM for 3 months is:')
v3_h = (v3/2160).round(4)
print(v3_h)

#print(' ')
#print(df_x3['SP'].mean().round(4))
#print(2160*df_x3['SP'].mean().round(4)) #checking that integration function works

In [None]:
df_x4 = df1.loc[(df1['AZ'] == 'us-east-1a') & (df1['IT'] == 'c4.4xlarge') & (df1['OS'] == 'Linux/UNIX')]

x4 = df_x4['SP'].values
y4 = df_x4['DT'].values
ts4 = pd.Series(x4,y4)

print('The total price you would have to pay for having this particular VM over the 3 month period is:')
v4 = ts4.integrate().round(4)
print(v4)
print(' ')

print('The total cost per hour you would have to pay for having this particular VM for 3 months is:')
v4_h = (v4/2160).round(4)
print(v4_h)

#print(' ')
#print(df_x4['SP'].mean().round(4))
#print(2160*df_x4['SP'].mean().round(4)) #checking that integration function works

In [None]:
df_x5 = df1.loc[(df1['AZ'] == 'us-east-1a') & (df1['IT'] == 'c4.8xlarge') & (df1['OS'] == 'Linux/UNIX')]

x5 = df_x5['SP'].values
y5 = df_x5['DT'].values
ts5 = pd.Series(x5,y5)

print('The total price you would have to pay for having this particular VM over the 3 month period is:')
v5 = ts5.integrate().round(4)
print(v5)
print(' ')

print('The total cost per hour you would have to pay for having this particular VM for 3 months is:')
v5_h = (v5/2160).round(4)
print(v5_h)

#print(' ')
#print(df_x5['SP'].mean().round(4))
#print(2160*df_x5['SP'].mean().round(4)) #checking that integration function works

In [None]:
plt.style.use('seaborn')

c4_x = ['c4.large','c4.xlarge','c4.2xlarge','c4.4xlarge','c4.8xlarge']
c4SP_a = [v1,v2,v3,v4,v5]

plt.bar(c4_x, c4SP_a, color='b')

plt.title('Total Price for c4 Instance Types AFTER the 2017 from 3/04/2021 to 6/01/2021' + "\n" + 
         'Across the us-east-1a Availability Zone and Linux/UNIX Operating Systems')
plt.ylabel('Total Cost (USD)')
plt.yticks([100,200,300,400,500,600,700,800,900,1000,1100,1200,1300,1400])

plt.tight_layout()
#plt.savefig('c4a_1a.png')
#plt.show()

#### AZ = us-east-1a
#### IT = m4 family
#### OS = Linux/UNIX

In [None]:
df_x6 = df1.loc[(df1['AZ'] == 'us-east-1a') & (df1['IT'] == 'm4.large') & (df1['OS'] == 'Linux/UNIX')]

x6 = df_x6['SP'].values
y6 = df_x6['DT'].values
ts6 = pd.Series(x6,y6)

print('The total price you would have to pay for having this particular VM over the 3 month period is:')
v6 = ts6.integrate().round(4)
print(v6)
print(' ')

print('The total cost per hour you would have to pay for having this particular VM for 3 months is:')
v6_h = (v6/2160).round(4)
print(v6_h)

#print(' ')
#print(df_x6['SP'].mean().round(4))
#print(2160*df_x6['SP'].mean().round(4)) #checking that integration function works

In [None]:
df_x7 = df1.loc[(df1['AZ'] == 'us-east-1a') & (df1['IT'] == 'm4.xlarge') & (df1['OS'] == 'Linux/UNIX')]

x7 = df_x7['SP'].values
y7 = df_x7['DT'].values
ts7 = pd.Series(x7,y7)

print('The total price you would have to pay for having this particular VM over the 3 month period is:')
v7 = ts7.integrate().round(4)
print(v7)
print(' ')

print('The total cost per hour you would have to pay for having this particular VM for 3 months is:')
v7_h = (v7/2160).round(4)
print(v7_h)

#print(' ')
#print(df_x7['SP'].mean().round(4))
#print(2160*df_x7['SP'].mean().round(4)) #checking that integration function works

In [None]:
df_x8 = df1.loc[(df1['AZ'] == 'us-east-1a') & (df1['IT'] == 'm4.2xlarge') & (df1['OS'] == 'Linux/UNIX')]

x8 = df_x8['SP'].values
y8 = df_x8['DT'].values
ts8 = pd.Series(x8,y8)

print('The total price you would have to pay for having this particular VM over the 3 month period is:')
v8 = ts8.integrate().round(4)
print(v8)
print(' ')

print('The total cost per hour you would have to pay for having this particular VM for 3 months is:')
v8_h = (v8/2160).round(4)
print(v8_h)

#print(' ')
#print(df_x8['SP'].mean().round(4))
#print(2160*df_x8['SP'].mean().round(4)) #checking that integration function works

In [None]:
df_x9 = df1.loc[(df1['AZ'] == 'us-east-1a') & (df1['IT'] == 'm4.4xlarge') & (df1['OS'] == 'Linux/UNIX')]

x9 = df_x9['SP'].values
y9 = df_x9['DT'].values
ts9 = pd.Series(x9,y9)

print('The total price you would have to pay for having this particular VM over the 3 month period is:')
v9 = ts9.integrate().round(4)
print(v9)
print(' ')

print('The total cost per hour you would have to pay for having this particular VM for 3 months is:')
v9_h = (v9/2160).round(4)
print(v9_h)

#print(' ')
#print(df_x9['SP'].mean().round(4))
#print(2160*df_x9['SP'].mean().round(4)) #checking that integration function works

In [None]:
df_x10 = df1.loc[(df1['AZ'] == 'us-east-1a') & (df1['IT'] == 'm4.10xlarge') & (df1['OS'] == 'Linux/UNIX')]

x10 = df_x10['SP'].values
y10 = df_x10['DT'].values
ts10 = pd.Series(x10,y10)

print('The total price you would have to pay for having this particular VM over the 3 month period is:')
v10 = ts10.integrate().round(4)
print(v10)
print(' ')

print('The total cost per hour you would have to pay for having this particular VM for 3 months is:')
v10_h = (v10/2160).round(4)
print(v10_h)

#print(' ')
#print(df_x10['SP'].mean().round(4))
#print(2160*df_x10['SP'].mean().round(4)) #checking that integration function works

In [None]:
df_x11 = df1.loc[(df1['AZ'] == 'us-east-1a') & (df1['IT'] == 'm4.16xlarge') & (df1['OS'] == 'Linux/UNIX')]

x11 = df_x11['SP'].values
y11 = df_x11['DT'].values
ts11 = pd.Series(x11,y11)

print('The total price you would have to pay for having this particular VM over the 3 month period is:')
v11 = ts11.integrate().round(4)
print(v11)
print(' ')

print('The total cost per hour you would have to pay for having this particular VM for 3 months is:')
v11_h = (v11/2160).round(4)
print(v11_h)

#print(' ')
#print(df_x11['SP'].mean().round(4))
#print(2160*df_x11['SP'].mean().round(4)) #checking that integration function works

In [None]:
plt.style.use('seaborn')

m4_x = ['m4.large', 'm4.xlarge', 'm4.2xlarge', 'm4.4xlarge', 'm4.10xlarge', 'm4.16xlarge']
m4SP_a = [v6,v7,v8,v9,v10,v11]

plt.bar(m4_x, m4SP_a, color='c')

plt.title('Total Price for m4 Instance Types AFTER the 2017 Change from 3/04/2021 to 6/01/2021' + "\n" + 
         'Across the us-east-1a Availability Zone and Linux/UNIX Operating Systems')
plt.ylabel('Total Cost (USD)')
plt.yticks([250,500,750,1000,1250,1500,1750,2000,2250,2500])

plt.tight_layout()
#plt.savefig('m4a_1a.png')
#plt.show()

### Using For Loops to do Calculations

##### must specify IT, OS, and AZ: can output total and/or hourly cost; can create graph

In [None]:
# CURRENTLY MADE FOR PRES

Instances = ['r4.large', 'r4.xlarge', 'r4.2xlarge', 'r4.4xlarge', 'r4.8xlarge', 'r4.16xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
AZ = ['us-east-1c'] #choose what you want

temp_i = []
temp_v = []

for o in OS:
    temp = df1[df1.OS == o]

    for i in Instances:
        temp1 = temp[temp.IT == i]
            
        for a in AZ:
            temp2 = temp1[temp1.AZ == a]
            
            x = temp2['SP'].values
            y = temp2['DT'].values
            ts = pd.Series(x,y)
            
            print('The total price you would have to pay for having the ' + i + ' VM over the 3 month period' + "\n" +
                 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            v = ts.integrate().round(4)
            print(v)
            print(' ')

            #print('The total cost per hour you would have to pay for having the ' + i + ' VM over the 3 month period'
                  #+ "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            #v_h = (v/2160).round(4)
            #print(v_h)
            
            temp_i.append(i)
            temp_v.append(v)
    
    plt.style.use('seaborn-poster')
    plt.bar(temp_i, temp_v, color='k')
            
    plt.title('Total Price for ' + i.split('.')[0] +
              ' Instance Types AFTER the 2017 Change from 3/04/2021 to 6/01/2021' + "\n" + 'Across the ' + a +
              ' Availability Zone and ' + o + ' Operating Systems', fontsize=20)
    plt.ylabel('Total Cost (USD)')
    #plt.yticks([0,500,1000,1500,2000,2500,3000,3500],fontsize=20)
    plt.xticks(fontsize=20,rotation=15) 

    plt.tight_layout()

    #plt.savefig(i.split('.')[0] + '_' + a + '_2021PRESmod.png')
#plt.show()

##### must specify IT and OS, will do for all AZs: can output total and/or hourly cost; can create graph

In [None]:
# Outputs total cost

Instances = ['c4.large', 'c4.xlarge', 'c4.2xlarge', 'c4.4xlarge', 'c4.8xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
temp_i = []
temp_v = []

for o in OS:
    temp = df1[df1.OS == o]

    for i in Instances:
        temp1 = temp[temp.IT == i]
        
        temp_a = []
            
        for a in temp1.AZ.unique():
            temp2 = temp1[temp1.AZ == a]
            
            x = temp2['SP'].values
            y = temp2['DT'].values
            ts = pd.Series(x,y)
            
            print('The total price you would have to pay for having the ' + i + ' VM over the 3 month period' + "\n" +
                 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            v = ts.integrate().round(4)
            print(v)
            print(' ')

            #print('The total cost per hour you would have to pay for having the ' + i + ' VM over the 3 month period'
                  #+ "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            #v_h = (v/2160).round(4)
            #print(v_h)
            #print(' ')
            
            temp_i.append(i)
            temp_v.append(v)
            temp_a.append(a)
    
    plt.figure(figsize=(25,5))
    plt.style.use('seaborn')
    
    ITx_indexes = np.arange(len(temp_i))
    width = 0.16
    
    plt.bar(ITx_indexes-3*width, temp_v, width=width, label=temp_a[0])
    plt.bar(ITx_indexes-2*width, temp_v, width=width, label=temp_a[1])
    plt.bar(ITx_indexes-width, temp_v, width=width, label=temp_a[2])
    plt.bar(ITx_indexes, temp_v, width=width, label=temp_a[3])
    plt.bar(ITx_indexes+width, temp_v, width=width, label=temp_a[4])
    plt.bar(ITx_indexes+2*width, temp_v, width=width, label=temp_a[5])
            
    plt.title('Total Price for ' + i.split('.')[0] +
              ' Instance Types AFTER the 2017 Change from 3/04/2021 to 6/01/2021' + "\n" +
              'Across the us-east-1 Availability Zones and ' + o + ' Operating Systems')
    plt.ylabel('Total Cost (USD)')
    plt.xticks(ticks=ITx_indexes, labels=temp_i)#,rotation=25)

    plt.legend(loc = 'best')
    plt.tight_layout()

    #plt.savefig(i.split('.')[0] + '_allAZ_after2017.png')
#plt.show()

#### Re-copied For Loop for comparing ITs by SIZE, not FAMILY/GENERATION (good for presentations)

In [None]:
# started with most ('insert size') ITs, narrowed them down by removing {low} outliers

Instances = ['c3.2xlarge', 'c4.2xlarge', 'c5.2xlarge', 'c5d.2xlarge', 'c6gn.2xlarge', 'd2.2xlarge', 'd3.2xlarge',
             'g4dn.2xlarge', 'i2.2xlarge', 'i3en.2xlarge', 'inf1.2xlarge', 'm3.2xlarge', 'm4.2xlarge', 'm5dn.2xlarge',
             'm6gd.2xlarge', 'r3.2xlarge', 'r4.2xlarge', 'r5a.2xlarge', 'r6gd.2xlarge','x1e.2xlarge', 'x2gd.2xlarge',
             'z1d.2xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
AZ = ['us-east-1c'] #choose what you want

temp_i = []
temp_v = []

for o in OS:
    temp = df1[df1.OS == o]

    for i in Instances:
        temp1 = temp[temp.IT == i]
            
        for a in AZ:
            temp2 = temp1[temp1.AZ == a]
            
            x = temp2['SP'].values
            y = temp2['DT'].values
            ts = pd.Series(x,y)
            
            print('The total price you would have to pay for having the ' + i + ' VM over the 3 month period' + "\n" +
                 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            v = ts.integrate().round(4)
            print(v)
            print(' ')

            #print('The total cost per hour you would have to pay for having the ' + i + ' VM over the 3 month period'
                  #+ "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            #v_h = (v/2160).round(4)
            #print(v_h)
            
            temp_i.append(i)
            temp_v.append(v)
    
    plt.style.use('seaborn-poster')
    plt.barh(temp_i, temp_v, color='#10873c')
            
    plt.title('Total Price for Most 2x Instance Types AFTER the 2017 Change from 3/04/2021 to 6/01/2021' +
              "\n" + 'Across the ' + a + ' Availability Zone and ' + o + ' Operating Systems')
    plt.xlabel('Total Cost (USD)')
    #plt.xticks([0,100,200,300,400,500,600,700,800,900,1000,1100])

    plt.tight_layout()

    #plt.savefig('2x_' + a + '_after2017.png')
#plt.show()

#### Re-copied For Loop for making plots to go in presentations (sizing filters adjusted)

In [None]:
Instances = ['r5n.large', 'r5n.xlarge', 'r5n.2xlarge', 'r5n.4xlarge', 'r5n.8xlarge',
             'r5n.12xlarge', 'r5n.16xlarge', 'r5n.24xlarge', 'r5n.metal'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
AZ = ['us-east-1c'] #choose what you want

temp_i = []
temp_v = []

for o in OS:
    temp = df1[df1.OS == o]

    for i in Instances:
        temp1 = temp[temp.IT == i]
            
        for a in AZ:
            temp2 = temp1[temp1.AZ == a]
            
            x = temp2['SP'].values
            y = temp2['DT'].values
            ts = pd.Series(x,y)
            
            print('The total price you would have to pay for having the ' + i + ' VM over the 3 month period' + "\n" +
                 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            v = ts.integrate().round(4)
            print(v)
            print(' ')

            #print('The total cost per hour you would have to pay for having the ' + i + ' VM over the 3 month period'
                  #+ "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            #v_h = (v/2160).round(4)
            #print(v_h)
            
            temp_i.append(i)
            temp_v.append(v)
    
    plt.style.use('seaborn-poster')
    plt.bar(temp_i, temp_v, color='#8a8a8a')
            
    plt.title('Total Price for ' + i.split('.')[0] +
              ' Instance Types AFTER the 2017 Change from 3/04/2021 to 6/01/2021' + "\n" + 'Across the ' + a +
              ' Availability Zone and ' + o + ' Operating Systems')
    plt.ylabel('Total Cost (USD)')
    #plt.xticks(rotation=20) 

    plt.tight_layout()

    #plt.savefig(i.split('.')[0] + '_' + a + '_after2017_PRES.png')
#plt.show()

### Dividing up the times into 3 periods, using for loop for calculations

In [None]:
df2 = df1.loc[(df1['DT'] >= '2021-03-04') & (df1['DT'] < '2021-04-01')] #first time period (all of March)
df3 = df1.loc[(df1['DT'] >= '2021-04-01') & (df1['DT'] < '2021-05-01')] #second time period (all of April)
df4 = df1.loc[(df1['DT'] >= '2021-05-01')] # third time period (all of May and the first of June)

#### Only looking at COST PER HOUR

In [None]:
# GRAPH = BAR PLOT, HOURLY COST

Instances = ['z1d.large', 'z1d.xlarge', 'z1d.2xlarge', 'z1d.3xlarge', 'z1d.6xlarge', 'z1d.12xlarge', 'z1d.metal'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
AZ = ['us-east-1c'] #choose what you want

temp_i = []
temp_vh_2 = []

for o in OS:
    temp = df2[df2.OS == o]

    temp_vh_3 = []
    
    for i in Instances:
        temp1 = temp[temp.IT == i]
        
        temp_vh_4 = []
            
        for a in AZ:
            temp2 = temp1[temp1.AZ == a]
            
            x = temp2['SP'].values
            y = temp2['DT'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the first time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            vh = (v/648).round(4)
            print(vh)
            print(' ')
            
            temp_vh_2.append(vh)
            
for o in OS:
    temp3 = df3[df3.OS == o]

    for i in Instances:
        temp4 = temp3[temp3.IT == i]
            
        for a in AZ:
            temp5 = temp4[temp4.AZ == a]
            
            x = temp5['SP'].values
            y = temp5['DT'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the second time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            vh = (v/720).round(4)
            print(vh)
            print(' ')
            
            temp_vh_3.append(vh)
            
for o in OS:
    temp6 = df4[df4.OS == o]

    for i in Instances:
        temp7 = temp6[temp6.IT == i]
            
        for a in AZ:
            temp8 = temp7[temp7.AZ == a]
            
            x = temp8['SP'].values
            y = temp8['DT'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the third time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            vh = (v/768).round(4)
            print(vh)
            print(' ')
            
            temp_i.append(i)
            temp_vh_4.append(vh)
    
    plt.style.use('seaborn')
    
    ITx_indexes = np.arange(len(temp_i))
    width = 0.25 
    
    plt.bar(ITx_indexes-width, temp_vh_2, width=width, label = '03/04/2021 -  03/31/2021')
    plt.bar(ITx_indexes, temp_vh_3, width=width, label = '04/01/2021 -  04/30/2021')
    plt.bar(ITx_indexes+width, temp_vh_4, width=width, label = '05/01/2021 -  06/01/2021')
            
    plt.title('Hourly Price for ' + i.split('.')[0] +
        ' Instance Types AFTER the 2017 Change Over Specific Time Periods' + "\n" + 'Across the ' + a +
        ' Availability Zone and ' + o + ' Operating Systems')
    plt.ylabel('Total Cost (USD)')
    plt.xticks(ticks=ITx_indexes, labels=temp_i)#, rotation=15)

    plt.legend()
    plt.tight_layout()

    #plt.savefig(i.split('.')[0] + '_' + a + '_after2017_dif_periods_hour_BAR.png')
#plt.show()

#### Re-copied hourly For Loop (bar) for making plots to go in presentations (sizing filters adjusted)

In [None]:
# GRAPH = BAR PLOT, HOURLY COST, for presentations

Instances = ['r3.large', 'r3.xlarge', 'r3.2xlarge', 'r3.4xlarge', 'r3.8xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
AZ = ['us-east-1c'] #choose what you want

temp_i = []
temp_vh_2 = []

for o in OS:
    temp = df2[df2.OS == o]

    temp_vh_3 = []
    
    for i in Instances:
        temp1 = temp[temp.IT == i]
        
        temp_vh_4 = []
            
        for a in AZ:
            temp2 = temp1[temp1.AZ == a]
            
            x = temp2['SP'].values
            y = temp2['DT'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the first time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            vh = (v/648).round(4)
            print(vh)
            print(' ')
            
            temp_vh_2.append(vh)
            
for o in OS:
    temp3 = df3[df3.OS == o]

    for i in Instances:
        temp4 = temp3[temp3.IT == i]
            
        for a in AZ:
            temp5 = temp4[temp4.AZ == a]
            
            x = temp5['SP'].values
            y = temp5['DT'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the second time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            vh = (v/720).round(4)
            print(vh)
            print(' ')
            
            temp_vh_3.append(vh)
            
for o in OS:
    temp6 = df4[df4.OS == o]

    for i in Instances:
        temp7 = temp6[temp6.IT == i]
            
        for a in AZ:
            temp8 = temp7[temp7.AZ == a]
            
            x = temp8['SP'].values
            y = temp8['DT'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the third time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            vh = (v/768).round(4)
            print(vh)
            print(' ')
            
            temp_i.append(i)
            temp_vh_4.append(vh)
    
    plt.style.use('seaborn-poster')
    
    ITx_indexes = np.arange(len(temp_i))
    width = 0.25 
    
    plt.bar(ITx_indexes-width, temp_vh_2, width=width, label = '03/04/2021 -  03/31/2021', color = '#2222b5')
    plt.bar(ITx_indexes, temp_vh_3, width=width, label = '04/01/2021 -  04/30/2021', color = '#22a358')
    plt.bar(ITx_indexes+width, temp_vh_4, width=width, label = '05/01/2021 -  06/01/2021', color = '#cf1717')
            
    plt.title('Hourly Price for ' + i.split('.')[0] +
        ' Instance Types A Few Years AFTER the 2017 Change Over Specific' + "\n" + 'Time Periods Across the ' + a +
        ' Availability Zone and ' + o + ' Operating Systems', fontsize=20)
    plt.ylabel('Total Cost (USD)')
    #plt.yticks([0,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55], fontsize=20)
    plt.xticks(ticks=ITx_indexes, labels=temp_i, fontsize=20)#, rotation=25)

    plt.legend(prop={"size":20})
    plt.tight_layout()

    #plt.savefig(i.split('.')[0] + '_' + a + '_2021_dif_periods_hour_PRESmod.png')
#plt.show()

### Copied all AZs cell to try to modify it

In [None]:
# Outputs total cost

#Instances = ['c4.large', 'c4.xlarge', 'c4.2xlarge', 'c4.4xlarge', 'c4.8xlarge'] #choose what you want
#OS = ['Linux/UNIX'] #choose what you want

#temp_v = []

#for o in OS:
    #temp = df1[df1.OS == o]
    
    #for i in Instances:
        #temp1 = temp[temp.IT == i]
        
        #temp_a = []
            
        #for a in temp1.AZ.unique():
            #temp2 = temp1[temp1.AZ == a]
            
            #x = temp2['SP'].values
            #y = temp2['DT'].values
            #ts = pd.Series(x,y)
            
            #print('The total price you would have to pay for having the ' + i + ' VM over the 3 month period' + "\n" +
                 #'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            #v = ts.integrate().round(4)
            #print(v)
            #print(' ')

            ##print('The total cost per hour you would have to pay for having the ' + i + ' VM over the 3 month period'
                  ##+ "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            ##v_h = (v/2160).round(4)
            ##print(v_h)
            ##print(' ')
            
            #temp_v.append(v)
            #temp_a.append(a)
    
    #plt.figure(figsize=(25,5))
    #plt.style.use('seaborn')
    
    #ITx_indexes = np.arange(len(Instances))
    #width = 0.16
    
    #plt.bar(ITx_indexes-3*width, temp_v, width=width, label=temp_a[0])
    #plt.bar(ITx_indexes-2*width, temp_v, width=width, label=temp_a[1])
    #plt.bar(ITx_indexes-width, temp_v, width=width, label=temp_a[2])
    #plt.bar(ITx_indexes, temp_v, width=width, label=temp_a[3])
    #plt.bar(ITx_indexes+width, temp_v, width=width, label=temp_a[4])
    #plt.bar(ITx_indexes+2*width, temp_v, width=width, label=temp_a[5])
            
    #plt.title('Total Price for ' + i.split('.')[0] +
            #' Instance Types AFTER the 2017 Change from 3/04/2021 to 6/01/2021' + "\n" +
            #'Across the us-east-1 Availability Zones and ' + o + ' Operating Systems')
    #plt.ylabel('Total Cost (USD)')
    #plt.xticks(ticks=ITx_indexes, labels=Instances)#,rotation=25)

    #plt.legend(loc = 'best')
    #plt.tight_layout()

    #plt.savefig(i.split('.')[0] + '_allAZ_after2017.png')
#plt.show()

In [None]:
#Instances

In [None]:
#print(temp_v)

In [None]:
#print(temp_a)

In [None]:
#ITx_indexes

## Part 2: Obtaining "before" values using the integration function

In [None]:
df5 = pd.read_csv('Data/old3_final.csv') #importing old data file

In [None]:
df5.drop(columns=['code','duration'],inplace=True) #DO NOT RUN THIS CELL MORE THAN ONCE

In [None]:
df5['time'] = pd.to_datetime(df5['time'], unit='ms') #converts to datetime and saves changes

In [None]:
df5.sort_values(by=['zone','instance','os', 'time'],inplace=True) #sorts and saves changes

In [None]:
df5 = df5[['zone','instance','os','price','time']] #reorders columns

In [None]:
filtZ1 = df5.loc[df5['zone'].str.contains("us-east-1")]
df6 = filtZ1 #filters obs and creates df6 --> only us-east-1 AZs

### us-east-1 AZs only

##### must specify IT, OS, and AZ: can output total and/or hourly cost; can create graph: using df6

In [None]:
Instances = ['c3.large', 'c3.xlarge', 'c3.2xlarge', 'c3.4xlarge', 'c3.8xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
AZ = ['us-east-1a'] #choose what you want

temp_i = []
temp_v = []

for o in OS:
    temp = df6[df6.os == o]

    for i in Instances:
        temp1 = temp[temp.instance == i]
            
        for a in AZ:
            temp2 = temp1[temp1.zone == a]
            
            x = temp2['price'].values
            y = temp2['time'].values
            ts = pd.Series(x,y)
            
            print('The total price you would have to pay for having the ' + i + ' VM over the 3 month period' + "\n" +
                 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            v = ts.integrate().round(4)
            print(v)
            print(' ')

            #print('The total cost per hour you would have to pay for having the ' + i + ' VM over the 3 month period'
                  #+ "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            #v_h = (v/2160).round(4)
            #print(v_h)
            
            temp_i.append(i)
            temp_v.append(v)
    
    plt.style.use('seaborn')
    plt.bar(temp_i, temp_v, color='#543636')
            
    plt.title('Total Price for ' + i.split('.')[0] +
              ' Instance Types Right BEFORE the 2017 Change from 7/01/2017 to 9/30/2017' + "\n" + 'Across the ' + a +
              ' Availability Zone and ' + o + ' Operating Systems')
    plt.ylabel('Total Cost (USD)')
    #plt.xticks(rotation=15)
    #plt.yticks([0,100,200,300,400,500,600,700,800,900,1000,1100,1200,1300,1400])

    plt.tight_layout()

    #plt.savefig(i.split('.')[0] + '_' + a + '_before2017.png')
#plt.show()

### us-east-2 AZs only

In [None]:
filtZ2 = df5.loc[df5['zone'].str.contains("us-east-2")]
df7 = filtZ2 #filters obs and creates df7 --> only us-east-2 AZs

##### must specify IT, OS, and AZ: can output total and/or hourly cost; can create graph: using df7

In [None]:
Instances = ['r4.large', 'r4.xlarge', 'r4.2xlarge', 'r4.4xlarge', 'r4.8xlarge', 'r4.16xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
AZ = ['us-east-2c'] #choose what you want

temp_i = []
temp_v = []

for o in OS:
    temp = df7[df7.os == o]

    for i in Instances:
        temp1 = temp[temp.instance == i]
            
        for a in AZ:
            temp2 = temp1[temp1.zone == a]
            
            x = temp2['price'].values
            y = temp2['time'].values
            ts = pd.Series(x,y)
            
            print('The total price you would have to pay for having the ' + i + ' VM over the 3 month period' + "\n" +
                 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            v = ts.integrate().round(4)
            print(v)
            print(' ')

            #print('The total cost per hour you would have to pay for having the ' + i + ' VM over the 3 month period'
                  #+ "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            #v_h = (v/2160).round(4)
            #print(v_h)
            
            temp_i.append(i)
            temp_v.append(v)
    
    plt.style.use('seaborn')
    plt.bar(temp_i, temp_v, color='#db2354')
            
    plt.title('Total Price for ' + i.split('.')[0] +
              ' Instance Types Right BEFORE the 2017 Change from 7/01/2017 to 9/30/2017' + "\n" + 'Across the ' + a +
              ' Availability Zone and ' + o + ' Operating Systems')
    plt.ylabel('Total Cost (USD)')
    #plt.xticks(rotation=15)
    #plt.yticks([0,100,200,300,400,500,600,700,800,900,1000,1100,1200,1300,1400,1500])
    
    plt.tight_layout()

    #plt.savefig(i.split('.')[0] + '_' + a + '_before2017PRES.png')
#plt.show()

### us-west-2 AZs only

In [None]:
filtZ3 = df5.loc[df5['zone'].str.contains("us-west-2")]
df8 = filtZ3 #filters obs and creates df8 --> only us-west-2 AZs

##### must specify IT, OS, and AZ: can output total and/or hourly cost; can create graph: using df8

In [None]:
Instances = ['r4.large', 'r4.xlarge', 'r4.2xlarge', 'r4.4xlarge', 'r4.8xlarge', 'r4.16xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
AZ = ['us-west-2c'] #choose what you want

temp_i = []
temp_v = []

for o in OS:
    temp = df8[df8.os == o]

    for i in Instances:
        temp1 = temp[temp.instance == i]
            
        for a in AZ:
            temp2 = temp1[temp1.zone == a]
            
            x = temp2['price'].values
            y = temp2['time'].values
            ts = pd.Series(x,y)
            
            print('The total price you would have to pay for having the ' + i + ' VM over the 3 month period' + "\n" +
                 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            v = ts.integrate().round(4)
            print(v)
            print(' ')

            #print('The total cost per hour you would have to pay for having the ' + i + ' VM over the 3 month period'
                  #+ "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            #v_h = (v/2160).round(4)
            #print(v_h)
            
            temp_i.append(i)
            temp_v.append(v)
    
    plt.style.use('seaborn')
    plt.bar(temp_i, temp_v, color='#aeb329')
            
    plt.title('Total Price for ' + i.split('.')[0] +
              ' Instance Types Right BEFORE the 2017 Change from 7/01/2017 to 9/30/2017' + "\n" + 'Across the ' + a +
              ' Availability Zone and ' + o + ' Operating Systems')
    plt.ylabel('Total Cost (USD)')
    #plt.xticks(rotation=15)
    #plt.yticks([0,2500,5000,7500,10000,12500,15000,17500,20000,22500])

    plt.tight_layout()

    #plt.savefig(i.split('.')[0] + '_' + a + '_before2017.png')
#plt.show()

## Part 3: More "after" data

In [None]:
df10 = pd.read_csv('Data/new3_final.csv') #importing new data file (2)

In [None]:
df10.drop(columns=['code','duration'],inplace=True) #DO NOT RUN THIS CELL MORE THAN ONCE

In [None]:
df10['time'] = pd.to_datetime(df10['time'], unit='ms') #converts to datetime and saves changes

In [None]:
df10.sort_values(by=['zone','instance','os', 'time'],inplace=True) #sorts and saves changes

In [None]:
df10 = df10[['zone','instance','os','price','time']] #reorders columns

In [None]:
filtZ4 = df10.loc[df10['zone'].str.contains("us-east-1")]
df11 = filtZ4 #filters obs and creates df11 --> only us-east-1 AZs

### us-east-1 AZs only

##### must specify IT, OS, and AZ: can output total and/or hourly cost; can create graph: using df11

In [None]:
Instances = ['c3.large', 'c3.xlarge', 'c3.2xlarge', 'c3.4xlarge', 'c3.8xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
AZ = ['us-east-1a'] #choose what you want

temp_i = []
temp_v = []

for o in OS:
    temp = df11[df11.os == o]

    for i in Instances:
        temp1 = temp[temp.instance == i]
            
        for a in AZ:
            temp2 = temp1[temp1.zone == a]
            
            x = temp2['price'].values
            y = temp2['time'].values
            ts = pd.Series(x,y)
            
            print('The total price you would have to pay for having the ' + i + ' VM over the 3 month period' + "\n" +
                 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            v = ts.integrate().round(4)
            print(v)
            print(' ')

            #print('The total cost per hour you would have to pay for having the ' + i + ' VM over the 3 month period'
                  #+ "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            #v_h = (v/2160).round(4)
            #print(v_h)
            
            temp_i.append(i)
            temp_v.append(v)
    
    plt.style.use('seaborn')
    plt.bar(temp_i, temp_v, color='#828587')
            
    plt.title('Total Price for ' + i.split('.')[0] +
              ' Instance Types Right AFTER the 2017 Change from 1/01/2018 to 3/31/2018' + "\n" + 'Across the ' + a +
              ' Availability Zone and ' + o + ' Operating Systems')
    plt.ylabel('Total Cost (USD)')
    #plt.xticks(rotation=15) 
    #plt.yticks([0,250,500,750,1000,1250,1500,1750,2000,2250,2500,2750])

    plt.tight_layout()

    #plt.savefig(i.split('.')[0] + '_' + a + '_2018.png')
#plt.show()

### us-east-2 AZs only

In [None]:
filtZ5 = df10.loc[df10['zone'].str.contains("us-east-2")]
df12 = filtZ5 #filters obs and creates df11 --> only us-east-2 AZs

##### must specify IT, OS, and AZ: can output total and/or hourly cost; can create graph: using df12

In [None]:
Instances = ['t2.micro', 't2.small', 't2.medium', 't2.large', 't2.xlarge', 't2.2xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
AZ = ['us-east-2c'] #choose what you want

temp_i = []
temp_v = []

for o in OS:
    temp = df12[df12.os == o]

    for i in Instances:
        temp1 = temp[temp.instance == i]
            
        for a in AZ:
            temp2 = temp1[temp1.zone == a]
            
            x = temp2['price'].values
            y = temp2['time'].values
            ts = pd.Series(x,y)
            
            print('The total price you would have to pay for having the ' + i + ' VM over the 3 month period' + "\n" +
                 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            v = ts.integrate().round(4)
            print(v)
            print(' ')

            #print('The total cost per hour you would have to pay for having the ' + i + ' VM over the 3 month period'
                  #+ "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            #v_h = (v/2160).round(4)
            #print(v_h)
            
            temp_i.append(i)
            temp_v.append(v)
    
    plt.style.use('seaborn')
    plt.bar(temp_i, temp_v, color='#6ad1e6')
            
    plt.title('Total Price for ' + i.split('.')[0] +
              ' Instance Types Right AFTER the 2017 Change from 1/01/2018 to 3/31/2018' + "\n" + 'Across the ' + a +
              ' Availability Zone and ' + o + ' Operating Systems')
    plt.ylabel('Total Cost (USD)')
    #plt.xticks(rotation=15) 
    #plt.yticks([0,25,50,75,100,125,150,175,200,225])

    plt.tight_layout()

    #plt.savefig(i.split('.')[0] + '_' + a + '_2018.png')
#plt.show()

### us-west-2 AZs only

In [None]:
filtZ6 = df10.loc[df10['zone'].str.contains("us-west-2")]
df13 = filtZ6 #filters obs and creates df11 --> only us-WEST-2 AZs

##### must specify IT, OS, and AZ: can output total and/or hourly cost; can create graph: using df13

In [None]:
Instances = ['x1e.xlarge', 'x1e.2xlarge', 'x1e.4xlarge', 'x1e.8xlarge', 'x1e.16xlarge', 'x1e.32xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
AZ = ['us-west-2c'] #choose what you want

temp_i = []
temp_v = []

for o in OS:
    temp = df13[df13.os == o]

    for i in Instances:
        temp1 = temp[temp.instance == i]
            
        for a in AZ:
            temp2 = temp1[temp1.zone == a]
            
            x = temp2['price'].values
            y = temp2['time'].values
            ts = pd.Series(x,y)
            
            print('The total price you would have to pay for having the ' + i + ' VM over the 3 month period' + "\n" +
                 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            v = ts.integrate().round(4)
            print(v)
            print(' ')

            #print('The total cost per hour you would have to pay for having the ' + i + ' VM over the 3 month period'
                  #+ "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            #v_h = (v/2160).round(4)
            #print(v_h)
            
            temp_i.append(i)
            temp_v.append(v)
    
    plt.style.use('seaborn')
    plt.bar(temp_i, temp_v, color='#55a81e')
            
    plt.title('Total Price for ' + i.split('.')[0] +
              ' Instance Types Right AFTER the 2017 Change from 1/01/2018 to 3/31/2018' + "\n" + 'Across the ' + a +
              ' Availability Zone and ' + o + ' Operating Systems')
    plt.ylabel('Total Cost (USD)')
    #plt.xticks(rotation=15) 
    #plt.yticks([0,5000,10000,15000,20000,25000,30000,35000,40000,45000,50000,55000])

    plt.tight_layout()

    #plt.savefig(i.split('.')[0] + '_' + a + '_2018.png')
#plt.show()

## Part 4: Comparing "before" & "after" values simultaneously

##### must specify IT, OS, and AZ: outputs total cost; can create graph: COMPARISON

In [None]:
# GRAPH = BAR PLOT, Comparing Costs from Before and After (2017 & 2018)

Instances = ['r4.large', 'r4.xlarge', 'r4.2xlarge', 'r4.4xlarge', 'r4.8xlarge', 'r4.16xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
AZ = ['us-west-2c'] #choose what you want

temp_i = []
temp_v_1 = []

for o in OS:
    temp = df5[df5.os == o]

    temp_v_2 = []
    
    for i in Instances:
        temp1 = temp[temp.instance == i]
            
        for a in AZ:
            temp2 = temp1[temp1.zone == a]
            
            x = temp2['price'].values
            y = temp2['time'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost you would have to pay for having the ' + i +
                  ' VM over the 2017 time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            print(v)
            print(' ')
            
            temp_v_1.append(v)
            
for o in OS:
    temp3 = df10[df10.os == o]

    for i in Instances:
        temp4 = temp3[temp3.instance == i]
            
        for a in AZ:
            temp5 = temp4[temp4.zone == a]
            
            x = temp5['price'].values
            y = temp5['time'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost you would have to pay for having the ' + i +
                  ' VM over the 2018 time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            print(v)
            print(' ')
            
            temp_i.append(i)
            temp_v_2.append(v)
            
    
    plt.style.use('seaborn')
    
    ITx_indexes = np.arange(len(temp_i))
    width = 0.25 
    
    plt.bar(ITx_indexes-0.5*width, temp_v_1, width=width, color='#7900db', label = '07/01/2017 -  09/30/2017')
    plt.bar(ITx_indexes+0.5*width, temp_v_2, width=width, color='#289962', label = '01/01/2018 -  03/31/2018')
            
    plt.title('Comparing Total Price for ' + i.split('.')[0] +
        ' Instance Types Right BEFORE & Right AFTER the 2017' + "\n" + 'Change Across the ' + a +
        ' Availability Zone and ' + o + ' Operating Systems')
    plt.ylabel('Total Cost (USD)')
    plt.xticks(ticks=ITx_indexes, labels=temp_i)#, rotation=15)
    #plt.yticks([0,2500,5000,7500,10000,12500,15000,17500,20000,22500,25000])
    
    plt.legend(loc='best')
    plt.tight_layout()

    #plt.savefig(i.split('.')[0] + '_' + a + '_b4Af_CompBAR.png')
#plt.show()

##### Recopied from above --> PRESENTATIONS

In [None]:
# GRAPH = BAR PLOT, Comparing Costs from Before and After --> PRES (2017 & 2018)

Instances = ['r4.large', 'r4.xlarge', 'r4.2xlarge', 'r4.4xlarge', 'r4.8xlarge', 'r4.16xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
AZ = ['us-east-1c'] #choose what you want

temp_i = []
temp_v_1 = []

for o in OS:
    temp = df6[df6.os == o]

    temp_v_2 = []
    
    for i in Instances:
        temp1 = temp[temp.instance == i]
            
        for a in AZ:
            temp2 = temp1[temp1.zone == a]
            
            x = temp2['price'].values
            y = temp2['time'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost you would have to pay for having the ' + i +
                  ' VM over the 2017 time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            print(v)
            print(' ')
            
            temp_v_1.append(v)
            
for o in OS:
    temp3 = df11[df11.os == o]

    for i in Instances:
        temp4 = temp3[temp3.instance == i]
            
        for a in AZ:
            temp5 = temp4[temp4.zone == a]
            
            x = temp5['price'].values
            y = temp5['time'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost you would have to pay for having the ' + i +
                  ' VM over the 2018 time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            print(v)
            print(' ')
            
            temp_i.append(i)
            temp_v_2.append(v)
            
    
    plt.style.use('seaborn-poster')
    
    ITx_indexes = np.arange(len(temp_i))
    width = 0.25 
    
    plt.bar(ITx_indexes-0.5*width, temp_v_1, width=width, color='#d11b1b', label = '07/01/2017 -  09/30/2017')
    plt.bar(ITx_indexes+0.5*width, temp_v_2, width=width, color='#27a164', label = '01/01/2018 -  03/31/2018')
            
    plt.title('Comparing Total Price for ' + i.split('.')[0] +
        ' Instance Types Right BEFORE & Right AFTER the 2017' + "\n" + 'Change Across the ' + a +
        ' Availability Zone and ' + o + ' Operating Systems',fontsize=20)
    plt.ylabel('Total Cost (USD)')
    plt.xticks(ticks=ITx_indexes, labels=temp_i, fontsize=20)#, rotation=15)
    plt.yticks([0,500,1000,1500,2000,2500,3000,3500,4000,4500,5000,5500], fontsize=20)
    
    plt.legend(prop={"size":20})
    plt.tight_layout()

    plt.savefig(i.split('.')[0] + '_' + a + '_17-18_PRESmod.png')
plt.show()

### Comparison using means (across all AZs): 2017, 2018, 2021

Reimporting raw data and sorting by date-time only

In [None]:
df15 = pd.read_csv('Data/old3_final.csv') #importing old data file

In [None]:
df15.drop(columns=['code','duration'],inplace=True) #DO NOT RUN THIS CELL MORE THAN ONCE

In [None]:
df15['time'] = pd.to_datetime(df15['time'], unit='ms') #converts to datetime and saves changes

In [None]:
df15.sort_values(by=['time'],inplace=True) #sorts ONLY BY DATE - TIME and saves changes

In [None]:
df15 = df15[['zone','instance','os','price','time']] #reorders columns

In [None]:
df16 = df15.loc[df15['zone'].str.contains('us-east-1')]
#df16 #filters df15 to only include us-east-1 AZs

In [None]:
df17 = pd.read_csv('Data/new3_final.csv') #importing new data file (2)

In [None]:
df17.drop(columns=['code','duration'],inplace=True) #DO NOT RUN THIS CELL MORE THAN ONCE

In [None]:
df17['time'] = pd.to_datetime(df17['time'], unit='ms') #converts to datetime and saves changes

In [None]:
df17.sort_values(by=['time'],inplace=True) #sorts ONLY BY DATE - TIME and saves changes

In [None]:
df17 = df17[['zone','instance','os','price','time']] #reorders columns

In [None]:
df18 = df17.loc[df17['zone'].str.contains('us-east-1')]
#df18 #filters df17 to only include us-east-1 AZs

In [None]:
df19 = pd.read_csv('Data/spot_0601.tsv', sep='\t', header=None) #importing spot_0601 file

In [None]:
df19.columns=['SPH','zone','instance','os','price','time'] #setting column names

In [None]:
df19.drop(columns=['SPH'],inplace=True) #drops the SPH column and saves that change; DON'T RUN MORE THAN ONCE

In [None]:
df19['time'] = pd.to_datetime(df19['time']) #converting to datetimes and saving changes

In [None]:
df19.sort_values(by=['time'],inplace=True) #sorts ONLY BY DATE - TIME and saves changes

In [None]:
df20 = df19.loc[(df19['zone'].str.contains('us-east-1')) & (df19['os'] == 'Linux/UNIX')]
#df20 #filters df19 to only include us-east-1 AZs & Linux/UNIX os's

- df16: 2017 data, us-east-1 AZs
- df18: 2018 data, us-east-1 AZs
- df20: 2021 data, us-east-1 AZs

In [None]:
# Mean Integration: Averaging across all AZs, 2017, 2018, 2021

Instances = ['z1d.large', 'z1d.xlarge', 'z1d.2xlarge', 'z1d.3xlarge', 'z1d.6xlarge', 'z1d.12xlarge', 'z1d.metal'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want

temp_i = []
temp_v_1 = []

for o in OS:
    temp = df16[df16.os == o]

    temp_v_2 = []
    
    for i in Instances:
        temp1 = temp[temp.instance == i]
        
        temp_v_3 = []
            
        x = temp1['price'].values
        y = temp1['time'].values
        ts = pd.Series(x,y)
            
        v = ts.integrate().mean().round(4)

        print('On average, the total cost you would have to pay for having the ' + i +
                ' VM over the 2017 time period' + "\n" + 'across all us-east-1 availability zones and the ' + o +
                ' operating system is:')
        print(v)
        print(' ')
            
        temp_v_1.append(v)
            
for o in OS:
    temp2 = df18[df18.os == o]

    for i in Instances:
        temp3 = temp2[temp2.instance == i]
            
        x = temp3['price'].values
        y = temp3['time'].values
        ts = pd.Series(x,y)
            
        v = ts.integrate().mean().round(4)

        print('On average, the total cost you would have to pay for having the ' + i +
                  ' VM over the 2018 time period' + "\n" + 'across all us-east-1 availability zones and the ' + o +
                  ' operating system is:')
        print(v)
        print(' ')
            
        temp_v_2.append(v)
        
for o in OS:
    temp4 = df20[df20.os == o]

    for i in Instances:
        temp5 = temp4[temp4.instance == i]
            
        x = temp5['price'].values
        y = temp5['time'].values
        ts = pd.Series(x,y)
            
        v = ts.integrate().mean().round(4)

        print('On average, the total cost you would have to pay for having the ' + i +
                  ' VM over the 2021 time period' + "\n" + 'across all us-east-1 availability zones and the ' + o +
                  ' operating system is:')
        print(v)
        print(' ')
            
        temp_i.append(i)
        temp_v_3.append(v)        
            
    
    plt.style.use('seaborn')
    
    ITx_indexes = np.arange(len(temp_i))
    width = 0.25 
    
    plt.bar(ITx_indexes-width, temp_v_1, width=width, label = '07/01/2017 -  09/30/2017')
    plt.bar(ITx_indexes, temp_v_2, width=width, label = '01/01/2018 -  03/31/2018')
    plt.bar(ITx_indexes+width, temp_v_3, width=width, label = '03/04/2021 -  06/01/2021')
            
    plt.title('Comparing the Total Price for ' + i.split('.')[0] +
        ' Instance Types Across Certain Time Periods,' + "\n" +
              'Averaged Across All us-east-1 Availability Zones, Within ' + o + ' Operating Systems')
    plt.ylabel('Total Cost (USD)')
    plt.xticks(ticks=ITx_indexes, labels=temp_i)#, rotation=20)
    #plt.yticks([0,10000,20000,30000,40000,50000,60000,70000,80000,90000,100000,110000])
    
    plt.legend(loc='best')
    plt.tight_layout()

    #plt.savefig(i.split('.')[0] + '_17-18-21_CompBAR.png')
#plt.show()

##### Recopied above for PRESENTATIONS!

In [None]:
# Mean Integration: Averaging across all AZs, 2017, 2018, 2021

Instances = ['r4.large', 'r4.xlarge', 'r4.2xlarge', 'r4.4xlarge', 'r4.8xlarge', 'r4.16xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want

temp_i = []
temp_v_1 = []

for o in OS:
    temp = df16[df16.os == o]

    temp_v_2 = []
    
    for i in Instances:
        temp1 = temp[temp.instance == i]
        
        temp_v_3 = []
            
        x = temp1['price'].values
        y = temp1['time'].values
        ts = pd.Series(x,y)
            
        v = ts.integrate().mean().round(4)

        print('On average, the total cost you would have to pay for having the ' + i +
                ' VM over the 2017 time period' + "\n" + 'across all us-east-1 availability zones and the ' + o +
                ' operating system is:')
        print(v)
        print(' ')
            
        temp_v_1.append(v)
            
for o in OS:
    temp2 = df18[df18.os == o]

    for i in Instances:
        temp3 = temp2[temp2.instance == i]
            
        x = temp3['price'].values
        y = temp3['time'].values
        ts = pd.Series(x,y)
            
        v = ts.integrate().mean().round(4)

        print('On average, the total cost you would have to pay for having the ' + i +
                  ' VM over the 2018 time period' + "\n" + 'across all us-east-1 availability zones and the ' + o +
                  ' operating system is:')
        print(v)
        print(' ')
            
        temp_v_2.append(v)
        
for o in OS:
    temp4 = df20[df20.os == o]

    for i in Instances:
        temp5 = temp4[temp4.instance == i]
            
        x = temp5['price'].values
        y = temp5['time'].values
        ts = pd.Series(x,y)
            
        v = ts.integrate().mean().round(4)

        print('On average, the total cost you would have to pay for having the ' + i +
                  ' VM over the 2021 time period' + "\n" + 'across all us-east-1 availability zones and the ' + o +
                  ' operating system is:')
        print(v)
        print(' ')
            
        temp_i.append(i)
        temp_v_3.append(v)        
            
    
    plt.style.use('seaborn-poster')
    
    ITx_indexes = np.arange(len(temp_i))
    width = 0.25 
    
    plt.bar(ITx_indexes-width, temp_v_1, width=width, label = '07/01/2017 -  09/30/2017', color='#e07e14')
    plt.bar(ITx_indexes, temp_v_2, width=width, label = '01/01/2018 -  03/31/2018', color='#2580db')
    plt.bar(ITx_indexes+width, temp_v_3, width=width, label = '03/04/2021 -  06/01/2021', color='#d41eb2')
            
    plt.title('Comparing the Total Price for ' + i.split('.')[0] +
        ' Instance Types Across Certain Time Periods,' + "\n" +
              'Averaged Across All us-east-1 Availability Zones, Within ' + o + ' Operating Systems')
    plt.ylabel('Total Cost (USD)')
    plt.xticks(ticks=ITx_indexes, labels=temp_i)#, rotation=15)
    #plt.yticks([0,500,1000,1500,2000,2500,3000,3500,4000,4500,5000,5500])
    
    plt.legend(loc='best')
    plt.tight_layout()

    #plt.savefig(i.split('.')[0] + '_17-18-21_PRES.png')
#plt.show()

#### Individual Time Series Plotting

##### 2017 & 2018 data

- us-east-1a: c4.large, d2.xlarge, m4.large, m4.xlarge, m4.2xlarge, m4.4xlarge
- us-east-1b: c4.large, c4.xlarge, d2.xlarge, i2.4xlarge, i3.xlarge, i3.2xlarge, i3.4xlarge, m3.large
- us-east-1c: m1.small, m3.medium
- us-east-1d: d2.2xlarge, i3.xlarge, m1.small, m3.medium
- us-east-1e: i3.xlarge, m3.medium
- us-east-1f: c4.large, i3.xlarge

- us-east-2a: c4.large, c4.xlarge, c4.2xlarge, c4.4xlarge, i3.2xlarge, m4.large, m4.xlarge, r3.large, r3.xlarge, r4.large, r4.xlarge, r4.2xlarge
- us-east-2b: c4.large, c4.xlarge, c4.2xlarge, i3.2xlarge, m4.large, m4.xlarge, r3.large, r4.large, r4.xlarge, r4.2xlarge, r4.4xlarge,
- us-east-2c: c4.large, c4.xlarge, c4.2xlarge, c4.4xlarge, r3.large, r3.xlarge, r3.2xlarge, r4.large

- us-west-2a: c3.large, c3.2xlarge, c4.large, c4.xlarge, i3.2xlarge, i3.4xlarge, m1.small, m3.medium, m4.large, m4.xlarge, m4.2xlarge, m4.4xlarge, r3.large, r4.xlarge, r4.2xlarge
- us-west-2b: c3.large, c4.large, c4.xlarge, c4.2xlarge, i3.large, i3.xlarge, i3.2xlarge, m1.small, m3.medium, m4.large, m4.xlarge, m4.2xlarge, m4.4xlarge, r3.large, r4.large, r4.xlarge, r4.2xlarge, r4.4xlarge
- us-west-2c: c3.large, c4.large, m1.small, m1.medium, m3.medium, m4.large, m4.xlarge, m4.2xlarge, r3.large, r3.xlarge, r4.large, r4.xlarge, r4.2xlarge, r4.4xlarge

In [None]:
plt.style.use('seaborn')

filt17 = df5.loc[(df5['zone'] == 'us-west-2c') & (df5['instance'] == 'r4.4xlarge') & (df5['os'] == 'Linux/UNIX')]
filt18 = df10.loc[(df10['zone'] == 'us-west-2c') & (df10['instance'] == 'r4.4xlarge') & (df10['os'] == 'Linux/UNIX')]

plt.plot_date(filt17['time'], filt17['price'], linestyle='--', marker='.', color='#e62072', label='2017')
plt.plot_date(filt18['time'], filt18['price'], linestyle='--', marker='.', color='#0dba50', label='2018')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the r4.4xlarge Instance Type Across the ' + "\n" +
          'us-west-2c Availability Zone and Linux/UNIX Products')
plt.ylabel('Spot Price (USD)')
#plt.ylim(0,1)
#plt.yticks([0,1,2,3,4,5,6,7,8,9,10,11])

plt.legend(loc='best')
plt.tight_layout()
#plt.savefig('r44xlarge_2cw_ts_proj.png')
#plt.show()

##### Recopied Above --> Use for Presentations!

In [None]:
plt.style.use('seaborn-poster')

filt17 = df6.loc[(df6['zone'] == 'us-east-1e') & (df6['instance'] == 'r4.2xlarge') & (df6['os'] == 'Linux/UNIX')]
filt18 = df11.loc[(df11['zone'] == 'us-east-1e') & (df11['instance'] == 'r4.2xlarge') & (df11['os'] == 'Linux/UNIX')]

plt.plot_date(filt17['time'], filt17['price'], linestyle='--', marker='.', color='#e01017', label='2017')
plt.plot_date(filt18['time'], filt18['price'], linestyle='--', marker='.', color='#26a341', label='2018')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the r4.2xlarge Instance Type Across the ' + "\n" +
          'us-east-1e Availability Zone and Linux/UNIX Products (2017 & 2018)')
plt.ylabel('Spot Price (USD)')
#plt.ylim(0,1)
#plt.yticks([0,0.02,0.04,0.06,0.08,0.1,0.12,0.14,0.16,0.18,0.2,0.22,0.24,0.26])

plt.legend(loc='best')
plt.tight_layout()
#plt.savefig('r4.2xlarge_1e_ts_17-18_PRES.png')
#plt.show()

In [None]:
# Time Series as a subplot

plt.style.use('seaborn')

fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, sharex=True) #setting up subplotting, changed plt.plot to ax.plot
    #this variable is simply equal to a subplot
 
filt17 = df5.loc[(df5['zone'] == 'us-east-1a') & (df5['instance'] == 'c4.large') & (df5['os'] == 'Linux/UNIX')]
ax1.plot_date(filt17['time'], filt17['price'], linestyle='--', marker='.', color='#291bc2')

filt18 = df10.loc[(df10['zone'] == 'us-east-1a') & (df10['instance'] == 'c4.large') & (df10['os'] == 'Linux/UNIX')]
ax2.plot_date(filt18['time'], filt18['price'], linestyle='--', marker='.', color='#17b061')
                  
plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

ax1.set_title('Time Series Plot of the c4.large Instance Type Across the ' + "\n" +
          'us-east-1a Availability Zone and Linux/UNIX Products') #changed to ax and added a set_
ax1.set_ylabel('Spot Price (USD)') #changed to ax and added a set_

ax2.set_xlabel('Date')
ax2.set_ylabel('Spot PRice (USD)')
                  
plt.tight_layout()
#fig.savefig('ts_proj_1.png')
#plt.show()

#### 2021 Time Series: Final Pres

In [None]:
plt.style.use('seaborn-poster') 

filt21_1 = df1.loc[(df1['AZ'] == 'us-east-1e') & (df1['IT'] == 'r5.large') & (df1['OS'] == 'Linux/UNIX')]
filt21_2 = df1.loc[(df1['AZ'] == 'us-east-1e') & (df1['IT'] == 'r5.xlarge') & (df1['OS'] == 'Linux/UNIX')]
filt21_3 = df1.loc[(df1['AZ'] == 'us-east-1e') & (df1['IT'] == 'r5.2xlarge') & (df1['OS'] == 'Linux/UNIX')]
filt21_4 = df1.loc[(df1['AZ'] == 'us-east-1e') & (df1['IT'] == 'r5.8xlarge') & (df1['OS'] == 'Linux/UNIX')]
filt21_5 = df1.loc[(df1['AZ'] == 'us-east-1e') & (df1['IT'] == 'r5.12xlarge') & (df1['OS'] == 'Linux/UNIX')]
filt21_6 = df1.loc[(df1['AZ'] == 'us-east-1e') & (df1['IT'] == 'r5.24xlarge') & (df1['OS'] == 'Linux/UNIX')]

plt.plot_date(filt21_1['DT'], filt21_1['SP'], linestyle='--', marker='.', label='r5.large', color = '#2b2bc4')
plt.plot_date(filt21_2['DT'], filt21_2['SP'], linestyle='--', marker='.', label='r5.xlarge', color = '#23ad68')
plt.plot_date(filt21_3['DT'], filt21_3['SP'], linestyle='--', marker='.', label='r5.2xlarge', color = '#c21515')
plt.plot_date(filt21_4['DT'], filt21_4['SP'], linestyle='--', marker='.', label='r5.8xlarge', color = '#8830c7')
plt.plot_date(filt21_5['DT'], filt21_5['SP'], linestyle='--', marker='.', label='r5.12xlarge', color = '#d0d615')
plt.plot_date(filt21_6['DT'], filt21_6['SP'], linestyle='--', marker='.', label='r5.24xlarge', color = 'k')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of Some r5 Instance Types Across the ' + "\n" +
          'us-east-1e Availability Zone and Linux/UNIX Products (2021)')
plt.ylabel('Spot Price (USD)')
#plt.ylim(0,1)
#plt.yticks([0,0.2,0.4,0.6,0.8,1,1.2,1.4,1.6,1.8,2,2.2,2.4])

#plt.legend(loc=(0.77,0.52))
plt.legend(loc='best')
plt.tight_layout()
#plt.savefig('r5_1e_ts_21_PRES.png')
#plt.show()

### Numerical Statistics

##### Five-Number Summaries

In [None]:
a1_6 = df6.loc[(df6['zone'] == 'us-east-1a') & (df6['os'] == 'Linux/UNIX')]
b1_6 = df6.loc[(df6['zone'] == 'us-east-1b') & (df6['os'] == 'Linux/UNIX')]
c1_6 = df6.loc[(df6['zone'] == 'us-east-1c') & (df6['os'] == 'Linux/UNIX')]
d1_6 = df6.loc[(df6['zone'] == 'us-east-1d') & (df6['os'] == 'Linux/UNIX')]
e1_6 = df6.loc[(df6['zone'] == 'us-east-1e') & (df6['os'] == 'Linux/UNIX')]
f1_6 = df6.loc[(df6['zone'] == 'us-east-1f') & (df6['os'] == 'Linux/UNIX')]

#2017 us-1

In [None]:
a1_11 = df11.loc[(df11['zone'] == 'us-east-1a') & (df11['os'] == 'Linux/UNIX')]
b1_11 = df11.loc[(df11['zone'] == 'us-east-1b') & (df11['os'] == 'Linux/UNIX')]
c1_11 = df11.loc[(df11['zone'] == 'us-east-1c') & (df11['os'] == 'Linux/UNIX')]
d1_11 = df11.loc[(df11['zone'] == 'us-east-1d') & (df11['os'] == 'Linux/UNIX')]
e1_11 = df11.loc[(df11['zone'] == 'us-east-1e') & (df11['os'] == 'Linux/UNIX')]
f1_11 = df11.loc[(df11['zone'] == 'us-east-1f') & (df11['os'] == 'Linux/UNIX')]

#2018 us-1

In [None]:
df1_LU = df1.loc[df1['OS'] == 'Linux/UNIX']

a1_1 = df1.loc[(df1['AZ'] == 'us-east-1a') & (df1['OS'] == 'Linux/UNIX')]
b1_1 = df1.loc[(df1['AZ'] == 'us-east-1b') & (df1['OS'] == 'Linux/UNIX')]
c1_1 = df1.loc[(df1['AZ'] == 'us-east-1c') & (df1['OS'] == 'Linux/UNIX')]
d1_1 = df1.loc[(df1['AZ'] == 'us-east-1d') & (df1['OS'] == 'Linux/UNIX')]
e1_1 = df1.loc[(df1['AZ'] == 'us-east-1e') & (df1['OS'] == 'Linux/UNIX')]
f1_1 = df1.loc[(df1['AZ'] == 'us-east-1f') & (df1['OS'] == 'Linux/UNIX')]

#2021 us-1

In [None]:
#BoxPlot: us-1

plt.style.use('seaborn')

AZs = [a1_11['price'], b1_11['price'], c1_11['price'], d1_11['price'], e1_11['price'], f1_11['price']]
#AZs = [a1_1['SP'], b1_1['SP'], c1_1['SP'], d1_1['SP'], e1_1['SP'], f1_1['SP']]

plt.boxplot(AZs, vert=False)

plt.title('Boxplots of Spot Prices (USD) Right AFTER the 2017 Change' + "\n" +
          '(01/01/2018 - 03/31/2018) Across All us-east-1 Availability Zones' + "\n" + '(Log-Scaled)')
plt.yticks([1,2,3,4,5,6], ['us-east-1a','us-east-1b','us-east-1c','us-east-1d','us-east-1e','us-east-1f'])
plt.xlim(10**-3, 10**3)
plt.xscale("log")
plt.xlabel("Spot Price")

plt.tight_layout()
#plt.savefig('18_us1_5ns_BOXlog.png')
#plt.show()

In [None]:
a2_7 = df7.loc[(df7['zone'] == 'us-east-2a') & (df7['os'] == 'Linux/UNIX')]
b2_7 = df7.loc[(df7['zone'] == 'us-east-2b') & (df7['os'] == 'Linux/UNIX')]
c2_7 = df7.loc[(df7['zone'] == 'us-east-2c') & (df7['os'] == 'Linux/UNIX')]
a2w_8 = df8.loc[(df8['zone'] == 'us-west-2a') & (df8['os'] == 'Linux/UNIX')]
b2w_8 = df8.loc[(df8['zone'] == 'us-west-2b') & (df8['os'] == 'Linux/UNIX')]
c2w_8 = df8.loc[(df8['zone'] == 'us-west-2c') & (df8['os'] == 'Linux/UNIX')]

#2017 (us-2)

In [None]:
a2_12 = df12.loc[(df12['zone'] == 'us-east-2a') & (df12['os'] == 'Linux/UNIX')]
b2_12 = df12.loc[(df12['zone'] == 'us-east-2b') & (df12['os'] == 'Linux/UNIX')]
c2_12 = df12.loc[(df12['zone'] == 'us-east-2c') & (df12['os'] == 'Linux/UNIX')]
a2w_13 = df13.loc[(df13['zone'] == 'us-west-2a') & (df13['os'] == 'Linux/UNIX')]
b2w_13 = df13.loc[(df13['zone'] == 'us-west-2b') & (df13['os'] == 'Linux/UNIX')]
c2w_13 = df13.loc[(df13['zone'] == 'us-west-2c') & (df13['os'] == 'Linux/UNIX')]

#2018 (us-2)

In [None]:
#BoxPlot: us-2

plt.style.use('seaborn')

AZs = [a2_7['price'], b2_7['price'], c2_7['price'], a2w_8['price'], b2w_8['price'], c2w_8['price']]
#AZs = [a2_12['price'], b2_12['price'], c2_12['price'], a2w_13['price'], b2w_13['price'], c2w_13['price']]

plt.boxplot(AZs, vert=False)

plt.title('Boxplots of Spot Prices (USD) Right BEFORE the 2017 Change' + "\n" +
          '(07/01/2017 - 09/30/2017) Across All us-2 Availability Zones' + "\n" + '(Log-Scaled)')
plt.yticks([1,2,3,4,5,6], ['us-east-2a','us-east-2b','us-east-2c','us-west-2a','us-west-2b','us-west-2c'])
plt.xlim(10**-3, 10**3)
plt.xscale("log")
plt.xlabel("Spot Price")

plt.tight_layout()
#plt.savefig('17_us2_5ns_BOXlog.png')
#plt.show()

##### Boxplot cell copied for presentations

In [None]:
#BoxPlot: PRES

plt.style.use('seaborn-poster')

#AZs = [a1_6['price'], b1_6['price'], c1_6['price'], d1_6['price'], e1_6['price'], f1_6['price']]
#AZs = [a1_11['price'], b1_11['price'], c1_11['price'], d1_11['price'], e1_11['price'], f1_11['price']]
#AZs = [a1_1['SP'], b1_1['SP'], c1_1['SP'], d1_1['SP'], e1_1['SP'], f1_1['SP']]

#AZs = [a2_7['price'], b2_7['price'], c2_7['price'], a2w_8['price'], b2w_8['price'], c2w_8['price']]
#AZs = [a2_12['price'], b2_12['price'], c2_12['price'], a2w_13['price'], b2w_13['price'], c2w_13['price']]

plt.boxplot(AZs, vert=False)

plt.title('Boxplots of Spot Prices (USD) Right AFTER the 2017 Change' + "\n" +
          '(01/01/2018 - 03/31/2018) Across All us-2 Availability Zones' + "\n" + '(Log-Scaled)')
plt.yticks([1,2,3,4,5,6], ['us-east-2a','us-east-2b','us-east-2c','us-west-2a','us-west-2b','us-west-2c'])
plt.xlim(10**-3, 10**3)
plt.xscale("log")
plt.xlabel("Spot Price")

plt.tight_layout()
#plt.savefig('18_us2_5ns_BOXlogPRES.png')
#plt.show()

##### Boxplots for each year, averaged across all AZs

Separate ones for within us-east-1. us-east-2, us-west-2

In [None]:
df1_LU = df1.loc[df1['OS'] == 'Linux/UNIX']

- df6: 2017 data, us-east-1
- df11: 2018 data, us-east-1
- df1_LU: 2021 data, us-east-1

In [None]:
#BoxPlot: 2017, 2018, 2021 --> averaged across us-east-1
    # (simply change style to make for a presentation)

plt.style.use('seaborn')

us_east_1 = [df6['price'], df11['price'], df1_LU['SP']]

plt.boxplot(us_east_1, vert=False)

plt.title('Boxplots of Spot Prices (USD) Across Certain Time Periods, Averaged Across' + "\n" +
              'All us-east-1 Availability Zones, Within Linux/UNIX Operating Systems' +
          "\n" + '(Log-Scaled)')
plt.yticks([1,2,3], ['07/01/2017 -' + "\n" + '09/30/2017','01/01/2018 -' + "\n" + '03/31/2018',
                     '03/04/2021 -' + "\n" + '06/01/2021'])
plt.xlim(10**-3, 10**3)
plt.xscale("log")
plt.xlabel("Spot Price")

plt.tight_layout()
#plt.savefig('17-18-21_us1_5ns_BOXlog.png')
#plt.show()

- df7: 2017 data, us-east-2
- df12: 2018 data, us-east-2

In [None]:
#BoxPlot: 2017, 2018 --> averaged across us-east-2
    # (simply change style to make for a presentation)

plt.style.use('seaborn')

us_east_2 = [df7['price'], df12['price']]

plt.boxplot(us_east_2, vert=False)

plt.title('Boxplots of Spot Prices (USD) Across Certain Time Periods, Averaged Across' + "\n" +
              'All us-east-2 Availability Zones, Within Linux/UNIX Operating Systems' +
          "\n" + '(Log-Scaled)')
plt.yticks([1,2], ['07/01/2017 -' + "\n" + '09/30/2017','01/01/2018 -' + "\n" + '03/31/2018'])
plt.xlim(10**-3, 10**3)
plt.xscale("log")
plt.xlabel("Spot Price")

plt.tight_layout()
#plt.savefig('17-18_use2_5ns_BOXlog.png')
#plt.show()

- df8: 2017 data, us-west-2
- df13: 2018 data, us-west-2

In [None]:
#BoxPlot: 2017, 2018 --> averaged across us-west-2
    # (simply change style to make for a presentation)

plt.style.use('seaborn')

us_west_2 = [df8['price'], df13['price']]

plt.boxplot(us_east_2, vert=False)

plt.title('Boxplots of Spot Prices (USD) Across Certain Time Periods, Averaged Across' + "\n" +
              'All us-west-2 Availability Zones, Within Linux/UNIX Operating Systems' +
          "\n" + '(Log-Scaled)')
plt.yticks([1,2], ['07/01/2017 -' + "\n" + '09/30/2017','01/01/2018 -' + "\n" + '03/31/2018'])
plt.xlim(10**-3, 10**3)
plt.xscale("log")
plt.xlabel("Spot Price")

plt.tight_layout()
#plt.savefig('17-18_usw2_5ns_BOXlog.png')
#plt.show()

##### Standard Deviations

In [None]:
sd_a1_6 = a1_6['price'].std()
sd_b1_6 = b1_6['price'].std()
sd_c1_6 = c1_6['price'].std()
sd_d1_6 = d1_6['price'].std()
sd_e1_6 = e1_6['price'].std()
sd_f1_6 = f1_6['price'].std()

sd_6s = [sd_a1_6, sd_b1_6, sd_c1_6, sd_d1_6, sd_e1_6, sd_f1_6]

#2017 stds (by AZ)

In [None]:
sd_a1_11 = a1_11['price'].std()
sd_b1_11 = b1_11['price'].std()
sd_c1_11 = c1_11['price'].std()
sd_d1_11 = d1_11['price'].std()
sd_e1_11 = e1_11['price'].std()
sd_f1_11 = f1_11['price'].std()

sd_11s = [sd_a1_11, sd_b1_11, sd_c1_11, sd_d1_11, sd_e1_11, sd_f1_11]

#2018 stds (by AZ)

In [None]:
sd_a1_1 = a1_1['SP'].std()
sd_b1_1 = b1_1['SP'].std()
sd_c1_1 = c1_1['SP'].std()
sd_d1_1 = d1_1['SP'].std()
sd_e1_1 = e1_1['SP'].std()
sd_f1_1 = f1_1['SP'].std()

sd_1s = [sd_a1_1, sd_b1_1, sd_c1_1, sd_d1_1, sd_e1_1, sd_f1_1]

#2021 stds (by AZ)

In [None]:
#Barplot: 2017, 2018 -> us-east-1
    # (simply change style to make for a presentation)

plt.style.use('seaborn')

AZs = ['us-east-1a', 'us-east-1b', 'us-east-1c', 'us-east-1d', 'us-east-1e', 'us-east-1f']
AZ_indexes = np.arange(len(AZs))

width = 0.25

plt.bar(AZ_indexes-0.5*width, sd_6s, width=width, label='07/01/2017 - 09/30/2017') 
plt.bar(AZ_indexes+0.5*width, sd_11s, width=width, label='01/01/2018 - 03/31/2018')

plt.xticks(ticks=AZ_indexes, labels=AZs)  

plt.title('Standard Deviations of Spot Prices Across All us-east-1 Availability Zones' + "\n" + 
         'Right BEFORE and Right AFTER the 2017 Change to the Spot Market')
plt.ylabel('Spot Price (USD)')
#plt.yticks([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17])

plt.legend(loc='best')
plt.tight_layout()
#plt.savefig('sd_us1_17-18_BAR')
#plt.show()

In [None]:
#Barplot: 2021 -> us-east-1
    # (simply change style to make for a presentation)

plt.style.use('seaborn')

AZs = ['us-east-1a', 'us-east-1b', 'us-east-1c', 'us-east-1d', 'us-east-1e', 'us-east-1f']

width = 0.65

plt.bar(AZs, sd_1s, width=width, color='k') 

plt.title('Standard Deviations of Spot Prices Across All us-east-1 Availability Zones' + "\n" + 
         'A Few Years After the 2017 Change to the Spot Market (03/04/2021 - 06/01/2021)')
plt.ylabel('Spot Price (USD)')
#plt.yticks([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4])

plt.tight_layout()
#plt.savefig('sd_us1_21_BAR')
#plt.show()

In [None]:
a2_7 = df7.loc[(df7['zone'] == 'us-east-2a') & (df7['os'] == 'Linux/UNIX')]
b2_7 = df7.loc[(df7['zone'] == 'us-east-2b') & (df7['os'] == 'Linux/UNIX')]
c2_7 = df7.loc[(df7['zone'] == 'us-east-2c') & (df7['os'] == 'Linux/UNIX')]
a2w_8 = df8.loc[(df8['zone'] == 'us-west-2a') & (df8['os'] == 'Linux/UNIX')]
b2w_8 = df8.loc[(df8['zone'] == 'us-west-2b') & (df8['os'] == 'Linux/UNIX')]
c2w_8 = df8.loc[(df8['zone'] == 'us-west-2c') & (df8['os'] == 'Linux/UNIX')]

#2017 (us-2)

In [None]:
sd_a2_7 = a2_7['price'].std()
sd_b2_7 = b2_7['price'].std()
sd_c2_7 = c2_7['price'].std()
sd_a2w_8 = a2w_8['price'].std()
sd_b2w_8 = b2w_8['price'].std()
sd_c2w_8 = c2w_8['price'].std()

sd_7s8s = [sd_a2_7, sd_b2_7, sd_c2_7, sd_a2w_8, sd_b2w_8, sd_c2w_8]

#2017 stds (us-2)

In [None]:
a2_12 = df12.loc[(df12['zone'] == 'us-east-2a') & (df12['os'] == 'Linux/UNIX')]
b2_12 = df12.loc[(df12['zone'] == 'us-east-2b') & (df12['os'] == 'Linux/UNIX')]
c2_12 = df12.loc[(df12['zone'] == 'us-east-2c') & (df12['os'] == 'Linux/UNIX')]
a2w_13 = df13.loc[(df13['zone'] == 'us-west-2a') & (df13['os'] == 'Linux/UNIX')]
b2w_13 = df13.loc[(df13['zone'] == 'us-west-2b') & (df13['os'] == 'Linux/UNIX')]
c2w_13 = df13.loc[(df13['zone'] == 'us-west-2c') & (df13['os'] == 'Linux/UNIX')]

#2018 (us-2)

In [None]:
sd_a2_12 = a2_12['price'].std()
sd_b2_12 = b2_12['price'].std()
sd_c2_12 = c2_12['price'].std()
sd_a2w_13 = a2w_13['price'].std()
sd_b2w_13 = b2w_13['price'].std()
sd_c2w_13 = c2w_13['price'].std()

sd_12s13s = [sd_a2_12, sd_b2_12, sd_c2_12, sd_a2w_13, sd_b2w_13, sd_c2w_13]

#2018 stds (us-2)

In [None]:
#Barplot: 2017, 2018 -> us-2
    # (simply change style to make for a presentation)

plt.style.use('seaborn')

AZs = ['us-east-2a', 'us-east-2b', 'us-east-2c', 'us-west-2a', 'us-west-2b', 'us-west-2c']
AZ_indexes = np.arange(len(AZs))

width = 0.25

plt.bar(AZ_indexes-0.5*width, sd_7s8s, width=width, label='07/01/2017 - 09/30/2017', color='#e01919') 
plt.bar(AZ_indexes+0.5*width, sd_12s13s, width=width, label='01/01/2018 - 03/31/2018', color='#ed980e')

plt.xticks(ticks=AZ_indexes, labels=AZs)  

plt.title('Standard Deviations of Spot Prices Across All us-2 Availability Zones' + "\n" + 
         'Right BEFORE and Right AFTER the 2017 Change to the Spot Market')
plt.ylabel('Spot Price (USD)')
#plt.yticks([0,1,2,3,4,5,6,7,8,9])

plt.legend(loc='best')
plt.tight_layout()
#plt.savefig('sd_us2_17-18_BAR')
#plt.show()

#### Comparing Standard Deviations & Means: Are they proportional?

##### us-east-1

In [None]:
sd_a1_6 = a1_6['price'].std().round(4)
sd_b1_6 = b1_6['price'].std().round(4)
sd_c1_6 = c1_6['price'].std().round(4)
sd_d1_6 = d1_6['price'].std().round(4)
sd_e1_6 = e1_6['price'].std().round(4)
sd_f1_6 = f1_6['price'].std().round(4)

sd_6s = [sd_a1_6, sd_b1_6, sd_c1_6, sd_d1_6, sd_e1_6, sd_f1_6]

#2017 stds (by AZ) -> us-east-1

In [None]:
mean_a1_6 = a1_6['price'].mean().round(4)
mean_b1_6 = b1_6['price'].mean().round(4)
mean_c1_6 = c1_6['price'].mean().round(4)
mean_d1_6 = d1_6['price'].mean().round(4)
mean_e1_6 = e1_6['price'].mean().round(4)
mean_f1_6 = f1_6['price'].mean().round(4)

mean_6s = [mean_a1_6, mean_b1_6, mean_c1_6, mean_d1_6, mean_e1_6, mean_f1_6]

#2017 means (by AZ) -> us-east-1

In [None]:
#Barplot: 2017 -> us-east-1
    # (simply change style to make for a presentation)

plt.style.use('seaborn')

AZs = ['us-east-1a', 'us-east-1b', 'us-east-1c', 'us-east-1d', 'us-east-1e', 'us-east-1f']
AZ_indexes = np.arange(len(AZs))

width = 0.25

plt.bar(AZ_indexes-0.5*width, mean_6s, width=width, label='Mean', color='#2dad58')
plt.bar(AZ_indexes+0.5*width, sd_6s, width=width, label='Standard Deviation', color='#e01919')

plt.xticks(ticks=AZ_indexes, labels=AZs)  

plt.title('Means and Standard Deviations of Spot Prices Across All us-east-1 Availability Zones' + "\n" + 
         'Right BEFORE the 2017 Change to the Spot Market (07/01/2017 - 09/30/2017)')
plt.ylabel('Spot Price (USD)')
#plt.yticks([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17])

plt.legend(loc='best')
plt.tight_layout()
#plt.savefig('mean-sd_us1_2017_BAR')
#plt.show()

In [None]:
sd_a1_11 = a1_11['price'].std().round(4)
sd_b1_11 = b1_11['price'].std().round(4)
sd_c1_11 = c1_11['price'].std().round(4)
sd_d1_11 = d1_11['price'].std().round(4)
sd_e1_11 = e1_11['price'].std().round(4)
sd_f1_11 = f1_11['price'].std().round(4)

sd_11s = [sd_a1_11, sd_b1_11, sd_c1_11, sd_d1_11, sd_e1_11, sd_f1_11]

#2018 stds (by AZ) -> us-east-1

In [None]:
mean_a1_11 = a1_11['price'].mean().round(4)
mean_b1_11 = b1_11['price'].mean().round(4)
mean_c1_11 = c1_11['price'].mean().round(4)
mean_d1_11 = d1_11['price'].mean().round(4)
mean_e1_11 = e1_11['price'].mean().round(4)
mean_f1_11 = f1_11['price'].mean().round(4)

mean_11s = [mean_a1_11, mean_b1_11, mean_c1_11, mean_d1_11, mean_e1_11, mean_f1_11]

#2018 means (by AZ) -> us-east-1

In [None]:
#Barplot: 2018 -> us-east-1
    # (simply change style to make for a presentation)

plt.style.use('seaborn')

AZs = ['us-east-1a', 'us-east-1b', 'us-east-1c', 'us-east-1d', 'us-east-1e', 'us-east-1f']
AZ_indexes = np.arange(len(AZs))

width = 0.25

plt.bar(AZ_indexes-0.5*width, mean_11s, width=width, label='Mean', color='#8822d6') 
plt.bar(AZ_indexes+0.5*width, sd_11s, width=width, label='Standard Deviation', color='#de1b69')

plt.xticks(ticks=AZ_indexes, labels=AZs)  

plt.title('Means and Standard Deviations of Spot Prices Across All us-east-1 Availability Zones' + "\n" + 
         'Right AFTER the 2017 Change to the Spot Market (01/01/2018 - 03/31/2018)')
plt.ylabel('Spot Price (USD)')
#plt.yticks([0,0.25,0.5,0.75,1,1.25,1.5,1.75,2,2.25,2.5,2.75])

plt.legend(loc=(0.76,0.91))
plt.tight_layout()
#plt.savefig('mean-sd_us1_2018_BAR')
#plt.show()

In [None]:
sd_a1_1 = a1_1['SP'].std().round(4)
sd_b1_1 = b1_1['SP'].std().round(4)
sd_c1_1 = c1_1['SP'].std().round(4)
sd_d1_1 = d1_1['SP'].std().round(4)
sd_e1_1 = e1_1['SP'].std().round(4)
sd_f1_1 = f1_1['SP'].std().round(4)

sd_1s = [sd_a1_1, sd_b1_1, sd_c1_1, sd_d1_1, sd_e1_1, sd_f1_1]

#2021 stds (by AZ) -> us-east-1

In [None]:
mean_a1_1 = a1_1['SP'].mean().round(4)
mean_b1_1 = b1_1['SP'].mean().round(4)
mean_c1_1 = c1_1['SP'].mean().round(4)
mean_d1_1 = d1_1['SP'].mean().round(4)
mean_e1_1 = e1_1['SP'].mean().round(4)
mean_f1_1 = f1_1['SP'].mean().round(4)

mean_1s = [mean_a1_1, mean_b1_1, mean_c1_1, mean_d1_1, mean_e1_1, mean_f1_1]

#2021 means (by AZ) -> us-east-1

In [None]:
#Barplot: 2018 -> us-east-1
    # (simply change style to make for a presentation)

plt.style.use('seaborn')

AZs = ['us-east-1a', 'us-east-1b', 'us-east-1c', 'us-east-1d', 'us-east-1e', 'us-east-1f']
AZ_indexes = np.arange(len(AZs))

width = 0.25

plt.bar(AZ_indexes-0.5*width, mean_1s, width=width, label='Mean', color='#3a5cf2') 
plt.bar(AZ_indexes+0.5*width, sd_1s, width=width, label='Standard Deviation', color='#f26722')

plt.xticks(ticks=AZ_indexes, labels=AZs)  

plt.title('Means and Standard Deviations of Spot Prices Across All us-east-1 Availability Zones' + "\n" + 
         'A Few Years After the 2017 Change to the Spot Market (03/04/2021 - 06/01/2021)')
plt.ylabel('Spot Price (USD)')
#plt.yticks([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4])

plt.legend(loc='best')
plt.tight_layout()
#plt.savefig('mean-sd_us1_2021_BAR')
#plt.show()

##### us-east-2

In [None]:
sd_a2_7 = a2_7['price'].std().round(4)
sd_b2_7 = b2_7['price'].std().round(4)
sd_c2_7 = c2_7['price'].std().round(4)

sd_7s = [sd_a2_7, sd_b2_7, sd_c2_7]

#2017 stds (us-east-2)

In [None]:
mean_a2_7 = a2_7['price'].mean().round(4)
mean_b2_7 = b2_7['price'].mean().round(4)
mean_c2_7 = c2_7['price'].mean().round(4)

mean_7s = [mean_a2_7, mean_b2_7, mean_c2_7]

#2017 means (us-east-2)

In [None]:
#Barplot: 2017 -> us-east-2
    # (simply change style to make for a presentation)

plt.style.use('seaborn')

AZs = ['us-east-2a', 'us-east-2b', 'us-east-2c']
AZ_indexes = np.arange(len(AZs))

width = 0.25

plt.bar(AZ_indexes-0.5*width, mean_7s, width=width, label='Mean', color='#7d7d7d')
plt.bar(AZ_indexes+0.5*width, sd_7s, width=width, label='Standard Deviation', color='#000000')

plt.xticks(ticks=AZ_indexes, labels=AZs)  

plt.title('Means and Standard Deviations of Spot Prices Across All us-east-2 Availability Zones' + "\n" + 
         'Right BEFORE the 2017 Change to the Spot Market (07/01/2017 - 09/30/2017)')
plt.ylabel('Spot Price (USD)')
#plt.yticks([0,0.5,1,1.5,2,2.5,3,3.5,4,4.5,5,5.5,6,6.5,7,7.5,8,8.5])

plt.legend(loc='best')
plt.tight_layout()
#plt.savefig('mean-sd_use2_2017_BAR')
#plt.show()

In [None]:
sd_a2_12 = a2_12['price'].std().round(4)
sd_b2_12 = b2_12['price'].std().round(4)
sd_c2_12 = c2_12['price'].std().round(4)

sd_12s = [sd_a2_12, sd_b2_12, sd_c2_12]

#2018 stds (us-east-2)

In [None]:
mean_a2_12 = a2_12['price'].mean().round(4)
mean_b2_12 = b2_12['price'].mean().round(4)
mean_c2_12 = c2_12['price'].mean().round(4)

mean_12s = [mean_a2_12, mean_b2_12, mean_c2_12]

#2018 means (us-east-2)

In [None]:
#Barplot: 2018 -> us-east-2
    # (simply change style to make for a presentation)

plt.style.use('seaborn')

AZs = ['us-east-2a', 'us-east-2b', 'us-east-2c']
AZ_indexes = np.arange(len(AZs))

width = 0.25

plt.bar(AZ_indexes-0.5*width, mean_12s, width=width, label='Mean', color='#cfdb25') 
plt.bar(AZ_indexes+0.5*width, sd_12s, width=width, label='Standard Deviation', color='#f0a51a')

plt.xticks(ticks=AZ_indexes, labels=AZs)  

plt.title('Means and Standard Deviations of Spot Prices Across All us-east-2 Availability Zones' + "\n" + 
         'Right AFTER the 2017 Change to the Spot Market (01/01/2018 - 03/31/2018)')
plt.ylabel('Spot Price (USD)')
#plt.yticks([0,0.25,0.5,0.75,1,1.25,1.5,1.75,2,2.25,2.5,2.75])

plt.legend(loc='best')
plt.tight_layout()
#plt.savefig('mean-sd_use2_2018_BAR')
#plt.show()

##### us-west-2

In [None]:
sd_a2w_8 = a2w_8['price'].std().round(4)
sd_b2w_8 = b2w_8['price'].std().round(4)
sd_c2w_8 = c2w_8['price'].std().round(4)

sd_8s = [sd_a2w_8, sd_b2w_8, sd_c2w_8]

#2017 stds (us-west-2)

In [None]:
mean_a2w_8 = a2w_8['price'].mean().round(4)
mean_b2w_8 = b2w_8['price'].mean().round(4)
mean_c2w_8 = c2w_8['price'].mean().round(4)

mean_8s = [mean_a2w_8, mean_b2w_8, mean_c2w_8]

#2017 stds (us-west-2)

In [None]:
#Barplot: 2017 -> us-west-2
    # (simply change style to make for a presentation)

plt.style.use('seaborn')

AZs = ['us-west-2a', 'us-west-2b', 'us-west-2c']
AZ_indexes = np.arange(len(AZs))

width = 0.25

plt.bar(AZ_indexes-0.5*width, mean_8s, width=width, label='Mean', color='#16cc3e')
plt.bar(AZ_indexes+0.5*width, sd_8s, width=width, label='Standard Deviation', color='#7119a8')

plt.xticks(ticks=AZ_indexes, labels=AZs)  

plt.title('Means and Standard Deviations of Spot Prices Across All us-west-2 Availability Zones' + "\n" + 
         'Right BEFORE the 2017 Change to the Spot Market (07/01/2017 - 09/30/2017)')
plt.ylabel('Spot Price (USD)')
#plt.yticks([0,0.5,1,1.5,2,2.5,3,3.5,4,4.5,5,5.5,6,6.5,7,7.5])

plt.legend(loc='best')
plt.tight_layout()
#plt.savefig('mean-sd_usw2_2017_BAR')
#plt.show()

In [None]:
sd_a2w_13 = a2w_13['price'].std().round(4)
sd_b2w_13 = b2w_13['price'].std().round(4)
sd_c2w_13 = c2w_13['price'].std().round(4)

sd_13s = [sd_a2w_13, sd_b2w_13, sd_c2w_13]

#2018 stds (us-west-2)

In [None]:
mean_a2w_13 = a2w_13['price'].mean().round(4)
mean_b2w_13 = b2w_13['price'].mean().round(4)
mean_c2w_13 = c2w_13['price'].mean().round(4)

mean_13s = [mean_a2w_13, mean_b2w_13, mean_c2w_13]

#2018 means (us-west-2)

In [None]:
#Barplot: 2018 -> us-west-2
    # (simply change style to make for a presentation)

plt.style.use('seaborn')

AZs = ['us-west-2a', 'us-west-2b', 'us-west-2c']
AZ_indexes = np.arange(len(AZs))

width = 0.25

plt.bar(AZ_indexes-0.5*width, mean_13s, width=width, label='Mean', color='#1cb8ab') 
plt.bar(AZ_indexes+0.5*width, sd_13s, width=width, label='Standard Deviation', color='#de1bd4')

plt.xticks(ticks=AZ_indexes, labels=AZs)  

plt.title('Means and Standard Deviations of Spot Prices Across All us-west-2 Availability Zones' + "\n" + 
         'Right AFTER the 2017 Change to the Spot Market (01/01/2018 - 03/31/2018)')
plt.ylabel('Spot Price (USD)')
#plt.yticks([0,0.25,0.5,0.75,1,1.25,1.5,1.75,2,2.25,2.5,2.75,3])

plt.legend(loc='best')
plt.tight_layout()
#plt.savefig('mean-sd_usw2_2018_BAR')
#plt.show()

#### Further Standard Deviation/Mean Stuff

##### 2017: us-east-1

In [None]:
# Means & SDs: ONLY CHOOSE ONE SPECIFIC INSTANCE TYPE AT A TIME
    # 2017: gen 2 and above, must have at least 4 dif sizes

Instances = ['r4.16xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want

temp_mean = []
temp_sd = []

for o in OS:
    temp = df6[df6.os == o]
    
    for i in Instances:
        temp1 = temp[temp.instance == i]
        
        temp_a = []
            
        for a in temp1.zone.unique():
            temp2 = temp1[temp1.zone == a]
            
            mean = temp2['price'].values.mean().round(4)
            sd = temp2['price'].values.std().round(4)
            
            print('The average price you would have to pay for having the ' + i + ' VM over the 3 month period' +
                  "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            print(mean)
            print(' ')
            
            print('The standard deviation of the spot price for having the ' + i +
                  ' VM over the 3 month period' + "\n" +'within the ' + a + ' availability zone and the ' +
                  o + ' operating system is:')
            print(sd)
            print(' ')
            
            temp_mean.append(mean)
            temp_sd.append(sd)
            temp_a.append(a)
    
    plt.style.use('seaborn')
    
    AZx_indexes = np.arange(len(temp_a))
    width = 0.25
    
    plt.bar(AZx_indexes-0.5*width, temp_mean, width=width, label='Mean', color='#6f19e0')
    plt.bar(AZx_indexes+0.5*width, temp_sd, width=width, label='Standard Deviation', color='#8c8c8c')
            
    plt.title('Means and Standard Deviations of Spot Prices for the ' + i + ' Instance' + "\n" +
              'Type Right BEFORE the 2017 Change (07/01/2017 - 09/30/2017) Across' + "\n" + 
              'the us-east-1 Availability Zones and ' + o + ' Operating Systems')
    plt.ylabel('Price (USD)')
    #plt.yticks([0,0.5,1,1.5,2,2.5,3,3.5,4,4.5,5])
    plt.xticks(ticks=AZx_indexes, labels=temp_a)#,rotation=25)

    plt.legend(loc='best')
    #plt.legend(loc = (.745,.895))
    plt.tight_layout()

    #plt.savefig(i + '_us1_meanSD_2017.png')
#plt.show()

##### 2018: us-east-1

In [None]:
# Means & SDs: ONLY CHOOSE ONE SPECIFIC INSTANCE TYPE AT A TIME
    # 2018: gen 2 and above, must have at least 4 dif sizes

Instances = ['x1e.32xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want

temp_mean = []
temp_sd = []

for o in OS:
    temp = df11[df11.os == o]
    
    for i in Instances:
        temp1 = temp[temp.instance == i]
        
        temp_a = []
            
        for a in temp1.zone.unique():
            temp2 = temp1[temp1.zone == a]
            
            mean = temp2['price'].values.mean().round(4)
            sd = temp2['price'].values.std().round(4)
            
            print('The average price you would have to pay for having the ' + i + ' VM over the 3 month period' +
                  "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            print(mean)
            print(' ')
            
            print('The standard deviation of the spot price for having the ' + i +
                  ' VM over the 3 month period' + "\n" +'within the ' + a + ' availability zone and the ' +
                  o + ' operating system is:')
            print(sd)
            print(' ')
            
            temp_mean.append(mean)
            temp_sd.append(sd)
            temp_a.append(a)
    
    plt.style.use('seaborn')
    
    AZx_indexes = np.arange(len(temp_a))
    width = 0.25
    
    plt.bar(AZx_indexes-0.5*width, temp_mean, width=width, label='Mean', color='#23ad4f')
    plt.bar(AZx_indexes+0.5*width, temp_sd, width=width, label='Standard Deviation', color='#231ebd')
            
    plt.title('Means and Standard Deviations of Spot Prices for the ' + i + ' Instance' + "\n" +
              'Type Right AFTER the 2017 Change (01/01/2018 - 03/31/2018) Across' + "\n" + 
              'the us-east-1 Availability Zones and ' + o + ' Operating Systems')
    plt.ylabel('Price (USD)')
    #plt.yticks([0,2.5,5,7.5,10,12.5,15,17.5,20,22.5,25,27.5])
    plt.xticks(ticks=AZx_indexes, labels=temp_a)#,rotation=25)

    plt.legend(loc='best')
    #plt.legend(loc = (.51,.88))
    plt.tight_layout()

    #plt.savefig(i + '_us1_meanSD_2018.png')
#plt.show()

##### 2021: us-east-1

In [None]:
# Means & SDs: ONLY CHOOSE ONE SPECIFIC INSTANCE TYPE AT A TIME
    # 2021: gen 2 and above, must have at least 4 dif sizes

Instances = ['x1e.32xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want

temp_mean = []
temp_sd = []

for o in OS:
    temp = df1[df1.OS == o]
    
    for i in Instances:
        temp1 = temp[temp.IT == i]
        
        temp_a = []
            
        for a in temp1.AZ.unique():
            temp2 = temp1[temp1.AZ == a]
            
            mean = temp2['SP'].values.mean().round(4)
            sd = temp2['SP'].values.std().round(4)
            
            print('The average price you would have to pay for having the ' + i + ' VM over the 3 month period' +
                  "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            print(mean)
            print(' ')
            
            print('The standard deviation of the spot price for having the ' + i +
                  ' VM over the 3 month period' + "\n" +'within the ' + a + ' availability zone and the ' +
                  o + ' operating system is:')
            print(sd)
            print(' ')
            
            temp_mean.append(mean)
            temp_sd.append(sd)
            temp_a.append(a)
    
    plt.style.use('seaborn')
    
    AZx_indexes = np.arange(len(temp_a))
    width = 0.25
    
    plt.bar(AZx_indexes-0.5*width, temp_mean, width=width, label='Mean', color='#d2e317')
    plt.bar(AZx_indexes+0.5*width, temp_sd, width=width, label='Standard Deviation', color='#f5ba18')
            
    plt.title('Means and Standard Deviations of Spot Prices for the ' + i + ' Instance' + "\n" +
              'Type A Few Years After the 2017 Change (03/04/2021 - 06/01/2021) Across' + "\n" + 
              'the us-east-1 Availability Zones and ' + o + ' Operating Systems')
    plt.ylabel('Price (USD)')
    #plt.yticks([0,1,2,3,4,5,6,7,8,9])
    plt.xticks(ticks=AZx_indexes, labels=temp_a)#,rotation=25)

    plt.legend(loc='best')
    #plt.legend(loc = (.75,.89))
    plt.tight_layout()

    #plt.savefig(i + '_us1_meanSD_2021.png')
#plt.show()

#### std/mean proportions: boxplots

In [None]:
# std/mean proportions: an entire family/gen

Instances = ['x1e.xlarge', 'x1e.2xlarge', 'x1e.4xlarge', 'x1e.8xlarge', 'x1e.16xlarge', 'x1e.32xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want

results_6 = []

for o in OS:
    temp = df6[df6.os == o]
    
    results_11 =[]
    
    for i in Instances:
        temp1 = temp[temp.instance == i]
        
        temp_a = []
        
        results_1 = []
            
        for a in temp1.zone.unique():
            temp2 = temp1[temp1.zone == a]
            
            mean = temp2['price'].values.mean().round(4)
            sd = temp2['price'].values.std().round(4)
            
            prop = (sd/mean).round(4)
            
            print('The standard deviation to mean ratio for having the ' + i + ' VM over the 3 month period in 2017' +
                  "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            print(prop)
            print(' ')
            
            temp_a.append(a)
            results_6.append(prop)
            
for o in OS:
    temp3 = df11[df11.os == o]

    for i in Instances:
        temp4 = temp3[temp3.instance == i]
            
        for a in temp4.zone.unique():
            temp5 = temp4[temp4.zone == a]
            
            mean = temp5['price'].values.mean().round(4)
            sd = temp5['price'].values.std().round(4)
            
            prop = (sd/mean).round(4)
            
            print('The standard deviation to mean ratio for having the ' + i + ' VM over the 3 month period in 2018' +
                  "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            print(prop)
            print(' ')
            
            temp_a.append(a)
            results_11.append(prop)
            
for o in OS:
    temp6 = df1_LU[df1_LU.OS == o]

    for i in Instances:
        temp7 = temp6[temp6.IT == i]
            
        for a in temp7.AZ.unique():
            temp8 = temp7[temp7.AZ == a]
            
            mean = temp8['SP'].values.mean().round(4)
            sd = temp8['SP'].values.std().round(4)
            
            prop = (sd/mean).round(4)
            
            print('The standard deviation to mean ratio for having the ' + i + ' VM over the 3 month period in 2021' +
                  "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            print(prop)
            print(' ')
            
            temp_a.append(a)
            results_1.append(prop)
    
    r_df6 = pd.Series(results_6)
    r_df11 = pd.Series(results_11)
    r_df1LU = pd.Series(results_1)
    
    boxes = [r_df6, r_df11, r_df1LU]
    
    plt.style.use('seaborn')
    
    plt.boxplot(boxes)

    plt.title('Boxplots of the Standard Deviation to Mean Ratio of x1e Instance Types, Across Certain' +
              "\n" + 'Time Periods, Within All us-east-1 Availability Zones and Linux/UNIX Operating Systems')
    plt.xticks([1,2,3], ['07/01/2017 -' + "\n" + '09/30/2017','01/01/2018 -' + "\n" + '03/31/2018',
                        '03/04/2021 -' + "\n" + '06/01/2021'])
    #plt.yticks([0,0.001,0.002,0.003,0.004,0.005,0.006,0.007,0.008,0.009,0.010,0.011,0.012,0.013,0.014,0.015])
    #plt.yticks([0,0.002,0.004,0.006,0.008,0.010,0.0120,0.0140,0.0160,0.0180,0.020,0.0220,0.024])
    #plt.yticks([0,0.005,0.01,0.015,0.02,0.025,0.03,0.035,0.04,0.045,0.05,0.055])
    #plt.yticks([0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18])
    #plt.yticks([0,0.025,0.05,0.075,0.1,0.125,0.15,0.175,0.2,0.225,0.25,0.275,0.3,0.325,0.35,0.375])
    #plt.yticks([0,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5])
    #plt.yticks([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3])
    #plt.yticks([0,0.2,0.4,0.6,0.8,1,1.2,1.4,1.6,1.8,2,2.2,2.4,2.6,2.8])
    #plt.yticks([0,0.5,1,1.5,2,2.5,3,3.5,4,4.5])

    plt.tight_layout()
    #plt.savefig('sd_mean_PROP_x1e_us1_BOX.png')
#plt.show()

In [None]:
# std/mean proportions: one specific IT at a time

Instances = ['x1e.32xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want

results_6 = []

for o in OS:
    temp = df6[df6.os == o]
    
    results_11 =[]
    
    for i in Instances:
        temp1 = temp[temp.instance == i]
        
        temp_a = []
        
        results_1 = []
            
        for a in temp1.zone.unique():
            temp2 = temp1[temp1.zone == a]
            
            mean = temp2['price'].values.mean().round(4)
            sd = temp2['price'].values.std().round(4)
            
            prop = (sd/mean).round(4)
            
            print('The standard deviation to mean ratio for having the ' + i + ' VM over the 3 month period in 2017' +
                  "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            print(prop)
            print(' ')
            
            temp_a.append(a)
            results_6.append(prop)
            
for o in OS:
    temp3 = df11[df11.os == o]

    for i in Instances:
        temp4 = temp3[temp3.instance == i]
            
        for a in temp4.zone.unique():
            temp5 = temp4[temp4.zone == a]
            
            mean = temp5['price'].values.mean().round(4)
            sd = temp5['price'].values.std().round(4)
            
            prop = (sd/mean).round(4)
            
            print('The standard deviation to mean ratio for having the ' + i + ' VM over the 3 month period in 2018' +
                  "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            print(prop)
            print(' ')
            
            temp_a.append(a)
            results_11.append(prop)
            
for o in OS:
    temp6 = df1_LU[df1_LU.OS == o]

    for i in Instances:
        temp7 = temp6[temp6.IT == i]
            
        for a in temp7.AZ.unique():
            temp8 = temp7[temp7.AZ == a]
            
            mean = temp8['SP'].values.mean().round(4)
            sd = temp8['SP'].values.std().round(4)
            
            prop = (sd/mean).round(4)
            
            print('The standard deviation to mean ratio for having the ' + i + ' VM over the 3 month period in 2021' +
                  "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            print(prop)
            print(' ')
            
            temp_a.append(a)
            results_1.append(prop)
    
    r_df6 = pd.Series(results_6)
    r_df11 = pd.Series(results_11)
    r_df1LU = pd.Series(results_1)
    
    boxes = [r_df6, r_df11, r_df1LU]
    
    plt.style.use('seaborn')
    
    plt.boxplot(boxes)

    plt.title('Boxplots of the Standard Deviation to Mean Ratio of the ' + i +  ' Instance Type, Across' + "\n" +
              'Certain Time Periods, Within All us-east-1 Availability Zones and Linux/UNIX Operating Systems')
    plt.xticks([1,2,3], ['07/01/2017 -' + "\n" + '09/30/2017','01/01/2018 -' + "\n" + '03/31/2018',
                        '03/04/2021 -' + "\n" + '06/01/2021'])
    #plt.yticks([0,0.001,0.002,0.003,0.004,0.005,0.006,0.007,0.008,0.009,0.010,0.011,0.012,0.013,0.014,0.015])
    #plt.yticks([0,0.002,0.004,0.006,0.008,0.010,0.0120,0.0140,0.0160,0.0180,0.020,0.0220,0.024])
    #plt.yticks([0,0.005,0.01,0.015,0.02,0.025,0.03,0.035,0.04,0.045,0.05,0.055])
    #plt.yticks([0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,0.11])
    #plt.yticks([0,0.025,0.05,0.075,0.1,0.125,0.15,0.175,0.2,0.225,0.25,0.275,0.3,0.325,0.35,0.375])
    #plt.yticks([0,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.6,0.65,0.70,0.75,0.8])
    #plt.yticks([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3])
    #plt.yticks([0,0.2,0.4,0.6,0.8,1,1.2,1.4,1.6,1.8,2,2.2,2.4,2.6,2.8])
    #plt.yticks([0,0.5,1,1.5,2,2.5,3,3.5,4,4.5])

    plt.tight_layout()
    #plt.savefig(i + '_sd_mean_PROP_us1_BOX.png')
#plt.show()

##### Recopied above for presentations

In [None]:
# std/mean proportions: PRES

Instances = ['r4.2xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want

results_6 = []

for o in OS:
    temp = df6[df6.os == o]
    
    results_11 =[]
    
    for i in Instances:
        temp1 = temp[temp.instance == i]
        
        temp_a = []
        
        results_1 = []
            
        for a in temp1.zone.unique():
            temp2 = temp1[temp1.zone == a]
            
            mean = temp2['price'].values.mean().round(4)
            sd = temp2['price'].values.std().round(4)
            
            prop = (sd/mean).round(4)
            
            print('The standard deviation to mean ratio for having the ' + i + ' VM over the 3 month period in 2017' +
                  "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            print(prop)
            print(' ')
            
            temp_a.append(a)
            results_6.append(prop)
            
for o in OS:
    temp3 = df11[df11.os == o]

    for i in Instances:
        temp4 = temp3[temp3.instance == i]
            
        for a in temp4.zone.unique():
            temp5 = temp4[temp4.zone == a]
            
            mean = temp5['price'].values.mean().round(4)
            sd = temp5['price'].values.std().round(4)
            
            prop = (sd/mean).round(4)
            
            print('The standard deviation to mean ratio for having the ' + i + ' VM over the 3 month period in 2018' +
                  "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            print(prop)
            print(' ')
            
            temp_a.append(a)
            results_11.append(prop)
            
for o in OS:
    temp6 = df1_LU[df1_LU.OS == o]

    for i in Instances:
        temp7 = temp6[temp6.IT == i]
            
        for a in temp7.AZ.unique():
            temp8 = temp7[temp7.AZ == a]
            
            mean = temp8['SP'].values.mean().round(4)
            sd = temp8['SP'].values.std().round(4)
            
            prop = (sd/mean).round(4)
            
            print('The standard deviation to mean ratio for having the ' + i + ' VM over the 3 month period in 2021' +
                  "\n" + 'within the ' + a + ' availability zone and the ' + o + ' operating system is:')
            print(prop)
            print(' ')
            
            temp_a.append(a)
            results_1.append(prop)
    
    r_df6 = pd.Series(results_6)
    r_df11 = pd.Series(results_11)
    r_df1LU = pd.Series(results_1)
    
    boxes = [r_df6, r_df11, r_df1LU]
    
    plt.style.use('seaborn-poster')
    
    plt.boxplot(boxes)
    
    #plt.title('Boxplots of the Standard Deviation to Mean Ratio of r4 Instance Types, Across Certain' +
              #"\n" + 'Time Periods, Within All us-east-1 Availability Zones and Linux/UNIX Operating Systems')
    plt.title('Boxplots of the Standard Deviation to Mean Ratio of the ' + i +  ' Instance Type, Across' + "\n" +
              'Certain Time Periods, Within All us-east-1 Availability Zones and Linux/UNIX Operating Systems')
    
    plt.xticks([1,2,3], ['07/01/2017 -' + "\n" + '09/30/2017','01/01/2018 -' + "\n" + '03/31/2018',
                        '03/04/2021 -' + "\n" + '06/01/2021'])
    #plt.yticks([0,0.001,0.002,0.003,0.004,0.005,0.006,0.007,0.008,0.009,0.010,0.011,0.012,0.013,0.014,0.015])
    #plt.yticks([0,0.002,0.004,0.006,0.008,0.010,0.0120,0.0140,0.0160,0.0180,0.020,0.0220,0.024])
    #plt.yticks([0,0.005,0.01,0.015,0.02,0.025,0.03,0.035,0.04,0.045,0.05,0.055])
    #plt.yticks([0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,0.11,0.12,0.13,0.14,0.15,0.16,0.17])
    #plt.yticks([0,0.025,0.05,0.075,0.1,0.125,0.15,0.175,0.2,0.225,0.25,0.275])
    #plt.yticks([0,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.6,0.65])
    #plt.yticks([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2])
    #plt.yticks([0,0.2,0.4,0.6,0.8,1,1.2,1.4,1.6,1.8,2,2.2,2.4])
    #plt.yticks([0,0.5,1,1.5,2,2.5,3,3.5,4,4.5])

    plt.tight_layout()
    #plt.savefig('sd_mean_PROP_r4_us1_BOXpres.png')
    #plt.savefig(i + '_sd_mean_PROP_us1_BOXpres.png')
#plt.show()

## Part 5: Additional analysis, using all data

#### Cost Per Hour for 2017 Data

In [None]:
df5_jul = df5.loc[(df5['time'] >= '2017-07-01') & (df5['time'] < '2017-08-01')] #first time period (all of July)
df5_aug = df5.loc[(df5['time'] >= '2017-08-01') & (df5['time'] < '2017-09-01')] #second time period (all of August)
df5_sept = df5.loc[(df5['time'] >= '2017-09-01')] # third time period (all of September)

In [None]:
# GRAPH = BAR PLOT, HOURLY COST

Instances = ['c4.large', 'c4.xlarge', 'c4.2xlarge', 'c4.4xlarge', 'c4.8xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
AZ = ['us-east-1f'] #choose what you want

temp_i = []
temp_vh_2 = []

for o in OS:
    temp = df5_jul[df5_jul.os == o]

    temp_vh_3 = []
    
    for i in Instances:
        temp1 = temp[temp.instance == i]
        
        temp_vh_4 = []
            
        for a in AZ:
            temp2 = temp1[temp1.zone == a]
            
            x = temp2['price'].values
            y = temp2['time'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the first time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            vh = (v/744).round(4)
            print(vh)
            print(' ')
            
            temp_vh_2.append(vh)
            
for o in OS:
    temp3 = df5_aug[df5_aug.os == o]

    for i in Instances:
        temp4 = temp3[temp3.instance == i]
            
        for a in AZ:
            temp5 = temp4[temp4.zone == a]
            
            x = temp5['price'].values
            y = temp5['time'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the second time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            vh = (v/744).round(4)
            print(vh)
            print(' ')
            
            temp_vh_3.append(vh)
            
for o in OS:
    temp6 = df5_sept[df5_sept.os == o]

    for i in Instances:
        temp7 = temp6[temp6.instance == i]
            
        for a in AZ:
            temp8 = temp7[temp7.zone == a]
            
            x = temp8['price'].values
            y = temp8['time'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the third time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            vh = (v/720).round(4)
            print(vh)
            print(' ')
            
            temp_i.append(i)
            temp_vh_4.append(vh)
    
    plt.style.use('seaborn')
    
    ITx_indexes = np.arange(len(temp_i))
    width = 0.25 
    
    plt.bar(ITx_indexes-width, temp_vh_2, width=width, label = '07/01/2017 -  07/31/2017')
    plt.bar(ITx_indexes, temp_vh_3, width=width, label = '08/01/2017 -  08/31/2017')
    plt.bar(ITx_indexes+width, temp_vh_4, width=width, label = '09/01/2017 -  09/30/2017')
            
    plt.title('Hourly Price for ' + i.split('.')[0] +
        ' Instance Types Right BEFORE the 2017 Change Over Specific Time' + "\n" + 'Periods Across the ' + a +
        ' Availability Zone and ' + o + ' Operating Systems')
    plt.ylabel('Total Cost (USD)')
    #plt.yticks([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2])
    plt.xticks(ticks=ITx_indexes, labels=temp_i)#, rotation=15)

    plt.legend()
    plt.tight_layout()

    #plt.savefig(i.split('.')[0] + '_' + a + '_2017_dif_periods_hour_BAR.png')
#plt.show()

In [None]:
# GRAPH = BAR PLOT, HOURLY COST --> PRES

Instances = ['c3.large', 'c3.xlarge', 'c3.2xlarge', 'c3.4xlarge', 'c3.8xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
AZ = ['us-east-1c'] #choose what you want

temp_i = []
temp_vh_2 = []

for o in OS:
    temp = df5_jul[df5_jul.os == o]

    temp_vh_3 = []
    
    for i in Instances:
        temp1 = temp[temp.instance == i]
        
        temp_vh_4 = []
            
        for a in AZ:
            temp2 = temp1[temp1.zone == a]
            
            x = temp2['price'].values
            y = temp2['time'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the first time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            vh = (v/744).round(4)
            print(vh)
            print(' ')
            
            temp_vh_2.append(vh)
            
for o in OS:
    temp3 = df5_aug[df5_aug.os == o]

    for i in Instances:
        temp4 = temp3[temp3.instance == i]
            
        for a in AZ:
            temp5 = temp4[temp4.zone == a]
            
            x = temp5['price'].values
            y = temp5['time'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the second time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            #vh = (v/744).round(4)
            vh = (v/432).round(4) #STARTS IN MID AUGUST
            print(vh)
            print(' ')
            
            temp_vh_3.append(vh)
            
for o in OS:
    temp6 = df5_sept[df5_sept.os == o]

    for i in Instances:
        temp7 = temp6[temp6.instance == i]
            
        for a in AZ:
            temp8 = temp7[temp7.zone == a]
            
            x = temp8['price'].values
            y = temp8['time'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the third time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            vh = (v/720).round(4)
            print(vh)
            print(' ')
            
            temp_i.append(i)
            temp_vh_4.append(vh)
    
    plt.style.use('seaborn-poster')
    
    ITx_indexes = np.arange(len(temp_i))
    width = 0.25 
    
    plt.bar(ITx_indexes-width, temp_vh_2, width=width, label = '07/01/2017 -  07/31/2017', color = '#2222b5')
    plt.bar(ITx_indexes, temp_vh_3, width=width, label = '08/01/2017 -  08/31/2017', color = '#22a358')
    plt.bar(ITx_indexes+width, temp_vh_4, width=width, label = '09/01/2017 -  09/30/2017', color = '#cf1717')
            
    plt.title('Hourly Price for ' + i.split('.')[0] +
        ' Instance Types Right BEFORE the 2017 Change Over Specific Time' + "\n" + 'Periods Across the ' + a +
        ' Availability Zone and ' + o + ' Operating Systems', fontsize=20)
    plt.ylabel('Total Cost (USD)', fontsize=25)
    plt.yticks([0,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.6,0.65], fontsize=20)
    plt.xticks(ticks=ITx_indexes, labels=temp_i, fontsize=20)#, rotation=15)

    plt.legend(prop={"size":20})
    plt.tight_layout()

    plt.savefig(i.split('.')[0] + '_' + a + '_2017_dif_periods_hour_PRESmod.png')
plt.show()

#### Cost Per Hour for 2018 Data

In [None]:
df11_jan = df11.loc[(df11['time'] >= '2018-01-01') & (df11['time'] < '2018-02-01')] #first time period (all of Jan)
df11_feb = df11.loc[(df11['time'] >= '2018-02-01') & (df11['time'] < '2018-03-01')] #second time period (all of Feb)
df11_mar = df11.loc[(df11['time'] >= '2018-03-01')] # third time period (all of March)

In [None]:
# GRAPH = BAR PLOT, HOURLY COST

Instances = ['x1e.xlarge', 'x1e.2xlarge', 'x1e.4xlarge', 'x1e.8xlarge', 'x1e.16xlarge', 'x1e.32xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
AZ = ['us-east-1f'] #choose what you want

temp_i = []
temp_vh_2 = []

for o in OS:
    temp = df11_jan[df11_jan.os == o]

    temp_vh_3 = []
    
    for i in Instances:
        temp1 = temp[temp.instance == i]
        
        temp_vh_4 = []
            
        for a in AZ:
            temp2 = temp1[temp1.zone == a]
            
            x = temp2['price'].values
            y = temp2['time'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the first time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            vh = (v/744).round(4)
            print(vh)
            print(' ')
            
            temp_vh_2.append(vh)
            
for o in OS:
    temp3 = df11_feb[df11_feb.os == o]

    for i in Instances:
        temp4 = temp3[temp3.instance == i]
            
        for a in AZ:
            temp5 = temp4[temp4.zone == a]
            
            x = temp5['price'].values
            y = temp5['time'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the second time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            vh = (v/672).round(4)
            print(vh)
            print(' ')
            
            temp_vh_3.append(vh)
            
for o in OS:
    temp6 = df11_mar[df11_mar.os == o]

    for i in Instances:
        temp7 = temp6[temp6.instance == i]
            
        for a in AZ:
            temp8 = temp7[temp7.zone == a]
            
            x = temp8['price'].values
            y = temp8['time'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the third time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            vh = (v/744).round(4)
            print(vh)
            print(' ')
            
            temp_i.append(i)
            temp_vh_4.append(vh)
    
    plt.style.use('seaborn')
    
    ITx_indexes = np.arange(len(temp_i))
    width = 0.25 
    
    plt.bar(ITx_indexes-width, temp_vh_2, width=width, label = '01/01/2018 -  01/31/2018')
    plt.bar(ITx_indexes, temp_vh_3, width=width, label = '02/01/2018 -  02/28/2018')
    plt.bar(ITx_indexes+width, temp_vh_4, width=width, label = '03/01/2018 -  03/31/2018')
            
    plt.title('Hourly Price for ' + i.split('.')[0] +
        ' Instance Types Right AFTER the 2017 Change Over Specific Time' + "\n" + 'Periods Across the ' + a +
        ' Availability Zone and ' + o + ' Operating Systems')
    plt.ylabel('Total Cost (USD)')
    #plt.yticks([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3])
    plt.xticks(ticks=ITx_indexes, labels=temp_i)#, rotation=15)

    plt.legend()
    plt.tight_layout()

    #plt.savefig(i.split('.')[0] + '_' + a + '_2018_dif_periods_hour_BAR.png')
#plt.show()

In [None]:
# GRAPH = BAR PLOT, HOURLY COST --> PRES

Instances = ['c5.large', 'c5.xlarge', 'c5.2xlarge', 'c5.4xlarge', 'c5.9xlarge', 'c5.18xlarge'] #choose what you want
OS = ['Linux/UNIX'] #choose what you want
AZ = ['us-east-1c'] #choose what you want

temp_i = []
temp_vh_2 = []

for o in OS:
    temp = df11_jan[df11_jan.os == o]

    temp_vh_3 = []
    
    for i in Instances:
        temp1 = temp[temp.instance == i]
        
        temp_vh_4 = []
            
        for a in AZ:
            temp2 = temp1[temp1.zone == a]
            
            x = temp2['price'].values
            y = temp2['time'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the first time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            vh = (v/744).round(4)
            print(vh)
            print(' ')
            
            temp_vh_2.append(vh)
            
for o in OS:
    temp3 = df11_feb[df11_feb.os == o]

    for i in Instances:
        temp4 = temp3[temp3.instance == i]
            
        for a in AZ:
            temp5 = temp4[temp4.zone == a]
            
            x = temp5['price'].values
            y = temp5['time'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the second time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            vh = (v/672).round(4)
            print(vh)
            print(' ')
            
            temp_vh_3.append(vh)
            
for o in OS:
    temp6 = df11_mar[df11_mar.os == o]

    for i in Instances:
        temp7 = temp6[temp6.instance == i]
            
        for a in AZ:
            temp8 = temp7[temp7.zone == a]
            
            x = temp8['price'].values
            y = temp8['time'].values
            ts = pd.Series(x,y)
            
            v = ts.integrate().round(4)

            print('The total cost per hour you would have to pay for having the ' + i +
                  ' VM over the third time period' + "\n" + 'within the ' + a + ' availability zone and the ' + o +
                  ' operating system is:')
            vh = (v/744).round(4)
            print(vh)
            print(' ')
            
            temp_i.append(i)
            temp_vh_4.append(vh)
    
    plt.style.use('seaborn-poster')
    
    ITx_indexes = np.arange(len(temp_i))
    width = 0.25 
    
    plt.bar(ITx_indexes-width, temp_vh_2, width=width, label = '01/01/2018 -  01/31/2018', color = '#2222b5')
    plt.bar(ITx_indexes, temp_vh_3, width=width, label = '02/01/2018 -  02/28/2018', color = '#22a358')
    plt.bar(ITx_indexes+width, temp_vh_4, width=width, label = '03/01/2018 -  03/31/2018', color = '#cf1717')
            
    plt.title('Hourly Price for ' + i.split('.')[0] +
        ' Instance Types Right AFTER the 2017 Change Over Specific Time' + "\n" + 'Periods Across the ' + a +
        ' Availability Zone and ' + o + ' Operating Systems', fontsize=22)
    plt.ylabel('Total Cost (USD)')
    plt.yticks([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3], fontsize=20)
    plt.xticks(ticks=ITx_indexes, labels=temp_i, fontsize=20)#, rotation=10)

    plt.legend(prop={"size":20})
    plt.tight_layout()

    plt.savefig(i.split('.')[0] + '_' + a + '_2018_dif_periods_hour_PRESmod.png')
plt.show()

#### Pricing Events Frequency

In [None]:
def get_events(PEF):
    events = 0
    for k in tqdm(range(1,PEF['instance'].size)):
        if PEF.price.values[k-1] != PEF.price.values[k]:
            #events += 1
            events = events + 1
    return(events)

In [None]:
def timestamp(Matt):
    return(np.int64((pd.to_datetime(Matt.time)))/1000000000)

In [None]:
def midnights(guy):
    temp = timestamp(guy)
    temp1 = []
    
    temp1.append(86400*math.ceil(temp[0]/86400))
    temp1.append(86400*math.floor(temp[-1]/86400))
    
    return(temp1)
        
# gives timestamp of first and last midnight

##### 2017

In [None]:
# 2017, c4, us-east-1a

c4l_2017_1a = df16[(df16['instance'] == 'c4.large') & (df16['zone'] == 'us-east-1a')]
c4x_2017_1a = df16[(df16['instance'] == 'c4.xlarge') & (df16['zone'] == 'us-east-1a')]
c42x_2017_1a = df16[(df16['instance'] == 'c4.2xlarge') & (df16['zone'] == 'us-east-1a')]
c44x_2017_1a = df16[(df16['instance'] == 'c4.4xlarge') & (df16['zone'] == 'us-east-1a')]
c48x_2017_1a = df16[(df16['instance'] == 'c4.8xlarge') & (df16['zone'] == 'us-east-1a')]

In [None]:
# 2017, m4, us-east-1a

m4l_2017_1a = df16[(df16['instance'] == 'm4.large') & (df16['zone'] == 'us-east-1a')]
m4x_2017_1a = df16[(df16['instance'] == 'm4.xlarge') & (df16['zone'] == 'us-east-1a')]
m42x_2017_1a = df16[(df16['instance'] == 'm4.2xlarge') & (df16['zone'] == 'us-east-1a')]
m44x_2017_1a = df16[(df16['instance'] == 'm4.4xlarge') & (df16['zone'] == 'us-east-1a')]
m410x_2017_1a = df16[(df16['instance'] == 'm4.10xlarge') & (df16['zone'] == 'us-east-1a')]
m416x_2017_1a = df16[(df16['instance'] == 'm4.16xlarge') & (df16['zone'] == 'us-east-1a')]

In [None]:
(df16.loc[df16['instance'].str.contains("r4")])['instance'].unique()

In [None]:
# 2017, r4, us-east-1a

r4l_2017_1a = df16[(df16['instance'] == 'r4.large') & (df16['zone'] == 'us-east-1a')]
r4x_2017_1a = df16[(df16['instance'] == 'r4.xlarge') & (df16['zone'] == 'us-east-1a')]
r42x_2017_1a = df16[(df16['instance'] == 'r4.2xlarge') & (df16['zone'] == 'us-east-1a')]
r44x_2017_1a = df16[(df16['instance'] == 'r4.4xlarge') & (df16['zone'] == 'us-east-1a')]
r48x_2017_1a = df16[(df16['instance'] == 'r4.8xlarge') & (df16['zone'] == 'us-east-1a')]
r416x_2017_1a = df16[(df16['instance'] == 'r4.16xlarge') & (df16['zone'] == 'us-east-1a')]

In [None]:
df_0 = r416x_2017_1a

mid = midnights(df_0)

temp = timestamp(df_0)

df_0.time = temp

# ONLY RUN ONCE BEFORE USING LOOP BELOW; change df_0 before running again

In [None]:
results = []
df = r416x_2017_1a

for d in range(mid[0],mid[1],86400):
    results.append(df[(df.time >= d) & (df.time < (d + 86400))].time.size)
#print(results)
    
    
    
xaxis = []

for d in range(mid[0],mid[1],86400):
    xaxis.append(d*1000000000)
xaxis = pd.to_datetime(pd.Series(xaxis))



xaxis2 = []

for k in xaxis.values:
    xaxis2.append(str(k).split("T")[0])
#print(xaxis2)



plt.style.use('seaborn-poster')

plt.plot(xaxis2, results, marker='.', color='k')
plt.title('Number of Pricing Events for the r4.16xlarge Instance Type in 2017 (Jul-Sept)' + "\n" +
         'Within the us-east-1a Availability Zone and Linux/UNIX Operating Systems')

plt.ylabel('Number of Pricing Events')
plt.yticks([0,100,200,300,400,500,600,700,800,900,1000])
plt.xticks([0,10,20,30,40,50,60,70,80,89], rotation=25)

plt.grid()
plt.tight_layout()
#plt.savefig('r4.16xlarge_PE_1a_2017.png')
plt.show()

##### 2018

In [None]:
# 2018, c4, us-east-1a

c4l_2018_1a = df18[(df18['instance'] == 'c4.large') & (df18['zone'] == 'us-east-1a')]
c4x_2018_1a = df18[(df18['instance'] == 'c4.xlarge') & (df18['zone'] == 'us-east-1a')]
c42x_2018_1a = df18[(df18['instance'] == 'c4.2xlarge') & (df18['zone'] == 'us-east-1a')]
c44x_2018_1a = df18[(df18['instance'] == 'c4.4xlarge') & (df18['zone'] == 'us-east-1a')]
c48x_2018_1a = df18[(df18['instance'] == 'c4.8xlarge') & (df18['zone'] == 'us-east-1a')]

In [None]:
# 2018, m4, us-east-1a

m4l_2018_1a = df18[(df18['instance'] == 'm4.large') & (df18['zone'] == 'us-east-1a')]
m4x_2018_1a = df18[(df18['instance'] == 'm4.xlarge') & (df18['zone'] == 'us-east-1a')]
m42x_2018_1a = df18[(df18['instance'] == 'm4.2xlarge') & (df18['zone'] == 'us-east-1a')]
m44x_2018_1a = df18[(df18['instance'] == 'm4.4xlarge') & (df18['zone'] == 'us-east-1a')]
m410x_2018_1a = df18[(df18['instance'] == 'm4.10xlarge') & (df18['zone'] == 'us-east-1a')]
m416x_2018_1a = df18[(df18['instance'] == 'm4.16xlarge') & (df18['zone'] == 'us-east-1a')]

In [None]:
# 2018, r4, us-east-1a

r4l_2018_1a = df18[(df18['instance'] == 'r4.large') & (df18['zone'] == 'us-east-1a')]
r4x_2018_1a = df18[(df18['instance'] == 'r4.xlarge') & (df18['zone'] == 'us-east-1a')]
r42x_2018_1a = df18[(df18['instance'] == 'r4.2xlarge') & (df18['zone'] == 'us-east-1a')]
r44x_2018_1a = df18[(df18['instance'] == 'r4.4xlarge') & (df18['zone'] == 'us-east-1a')]
r48x_2018_1a = df18[(df18['instance'] == 'r4.8xlarge') & (df18['zone'] == 'us-east-1a')]
r416x_2018_1a = df18[(df18['instance'] == 'r4.16xlarge') & (df18['zone'] == 'us-east-1a')]

In [None]:
df_0 = r416x_2018_1a

mid = midnights(df_0)

temp = timestamp(df_0)

df_0.time = temp

# ONLY RUN ONCE BEFORE USING LOOP BELOW; change df_0 before running again

In [None]:
results = []
df = r416x_2018_1a

for d in range(mid[0],mid[1],86400):
    results.append(df[(df.time >= d) & (df.time < (d + 86400))].time.size)
#print(results)
    
    
    
xaxis = []

for d in range(mid[0],mid[1],86400):
    xaxis.append(d*1000000000)
xaxis = pd.to_datetime(pd.Series(xaxis))



xaxis2 = []

for k in xaxis.values:
    xaxis2.append(str(k).split("T")[0])
#print(xaxis2)



plt.style.use('seaborn-poster')

plt.plot(xaxis2, results, marker='.', color='k')
plt.title('Number of Pricing Events for the r4.16xlarge Instance Type in 2018 (Jan-Mar)' + "\n" +
         'Within the us-east-1a Availability Zone and Linux/UNIX Operating Systems')

plt.ylabel('Number of Pricing Events')
plt.yticks([0,1,2,3,4,5,6,7,8,9,10])
plt.xticks([0,10,21,32,43,54,65,76,87], rotation=25)

plt.grid()
plt.tight_layout()
#plt.savefig('r4.16xlarge_PE_1a_2018.png')
plt.show()

##### 2021

In [None]:
# 2021, c4, us-east-1a

c4l_2021_1a = df20[(df20['instance'] == 'c4.large') & (df20['zone'] == 'us-east-1a')]
c4x_2021_1a = df20[(df20['instance'] == 'c4.xlarge') & (df20['zone'] == 'us-east-1a')]
c42x_2021_1a = df20[(df20['instance'] == 'c4.2xlarge') & (df20['zone'] == 'us-east-1a')]
c44x_2021_1a = df20[(df20['instance'] == 'c4.4xlarge') & (df20['zone'] == 'us-east-1a')]
c48x_2021_1a = df20[(df20['instance'] == 'c4.8xlarge') & (df20['zone'] == 'us-east-1a')]

In [None]:
# 2021, m4, us-east-1a

m4l_2021_1a = df20[(df20['instance'] == 'm4.large') & (df20['zone'] == 'us-east-1a')]
m4x_2021_1a = df20[(df20['instance'] == 'm4.xlarge') & (df20['zone'] == 'us-east-1a')]
m42x_2021_1a = df20[(df20['instance'] == 'm4.2xlarge') & (df20['zone'] == 'us-east-1a')]
m44x_2021_1a = df20[(df20['instance'] == 'm4.4xlarge') & (df20['zone'] == 'us-east-1a')]
m410x_2021_1a = df20[(df20['instance'] == 'm4.10xlarge') & (df20['zone'] == 'us-east-1a')]
m416x_2021_1a = df20[(df20['instance'] == 'm4.16xlarge') & (df20['zone'] == 'us-east-1a')]

In [None]:
(df20.loc[df20['instance'].str.contains("r4")])['instance'].unique()

In [None]:
# 2021, m4, us-east-1a

r4l_2021_1a = df20[(df20['instance'] == 'r4.large') & (df20['zone'] == 'us-east-1a')]
r4x_2021_1a = df20[(df20['instance'] == 'r4.xlarge') & (df20['zone'] == 'us-east-1a')]
r42x_2021_1a = df20[(df20['instance'] == 'r4.2xlarge') & (df20['zone'] == 'us-east-1a')]
r44x_2021_1a = df20[(df20['instance'] == 'r4.4xlarge') & (df20['zone'] == 'us-east-1a')]
r48x_2021_1a = df20[(df20['instance'] == 'r4.8xlarge') & (df20['zone'] == 'us-east-1a')]
r416x_2021_1a = df20[(df20['instance'] == 'r4.16xlarge') & (df20['zone'] == 'us-east-1a')]

In [None]:
df_0 = r416x_2021_1a

mid = midnights(df_0)

temp = timestamp(df_0)

df_0.time = temp

# ONLY RUN ONCE BEFORE USING LOOP BELOW; change df_0 before running again

In [None]:
results = []
df = r416x_2021_1a

for d in range(mid[0],mid[1],86400):
    results.append(df[(df.time >= d) & (df.time < (d + 86400))].time.size)
#print(results)
    
    
    
xaxis = []

for d in range(mid[0],mid[1],86400):
    xaxis.append(d*1000000000)
xaxis = pd.to_datetime(pd.Series(xaxis))



xaxis2 = []

for k in xaxis.values:
    xaxis2.append(str(k).split("T")[0])
#print(xaxis2)



plt.style.use('seaborn-poster')

plt.plot(xaxis2, results, marker='.', color='k')
plt.title('Number of Pricing Events for the r4.16xlarge Instance Type in 2021 (Mar-May)' + "\n" +
         'Within the us-east-1a Availability Zone and Linux/UNIX Operating Systems')

plt.ylabel('Number of Pricing Events')
plt.yticks([0,1,2,3,4,5,6,7,8,9,10])
plt.xticks([0,10,21,32,43,54,65,76,87], rotation=25)

plt.grid()
plt.tight_layout()
#plt.savefig('r4.16xlarge_PE_1a_2021.png')
plt.show()

#### Heat Maps

In [None]:
import seaborn as sns

In [None]:
c4l_2017_1a['time'].day

#### More Stuff

In [None]:
plt.style.use('seaborn-poster')

filt1 = df10[(df10['zone'] == 'us-east-1c') & (df10['instance'] == 'c5.xlarge') & (df10['os'] == 'Linux/UNIX')]
filt2 = df10[(df10['zone'] == 'us-east-1c') & (df10['instance'] == 'c5.2xlarge') & (df10['os'] == 'Linux/UNIX')]
filt3 = df10[(df10['zone'] == 'us-east-1c') & (df10['instance'] == 'm5.xlarge') & (df10['os'] == 'Linux/UNIX')]
filt4 = df10[(df10['zone'] == 'us-east-1c') & (df10['instance'] == 'm5.2xlarge') & (df10['os'] == 'Linux/UNIX')]

plt.plot_date(filt1['time'], filt1['price'], linestyle='--', marker='.', color='#d41724', label='c5.xlarge')
plt.plot_date(filt2['time'], filt2['price'], linestyle='--', marker='.', color='#187d3d', label='c5.2xlarge')
plt.plot_date(filt3['time'], filt3['price'], linestyle='--', marker='.', color='#2954ab', label='m5.xlarge')
plt.plot_date(filt4['time'], filt4['price'], linestyle='--', marker='.', color='#913cc9', label='m5.2xlarge')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of Certain c5 & m5 Instance Types Within the ' + "\n" +
          'us-east-1c Availability Zone and Linux/UNIX Products')
plt.ylabel('Spot Price (USD)')
#plt.ylim(0,1)
plt.yticks([0,0.02,0.04,0.06,0.08,0.1,0.12,0.14,0.16,0.18])

plt.legend(prop={"size":30})
plt.tight_layout()
plt.savefig('c5m5_1c_2018.png')
plt.show()