# Test Notebook

# Data Frames Key:

- df1 = imported spot_0601 file
- df2 = spot_0601 file sorted by AZ, Instance Type, and SP (respectively)
- df3 = df2 with Date and Time Stamp converted to a datetime, and sorted by AZ, IT, PD, & DT (respectively)
- df4 = only obs from df3 where the AZ is us-east-1c
- df5 = df4 further broken down to only include Linux/UNIX PDs and the latest generation ITs
- df6 = df5 further broken down to only include "medium" and "large" ITs
- df7 = df5 further broken down to only include ITs that are 8x & 12x
- df8 = df5 further broken down to only include "metal" ITs
- df9 = df5 further broken down to only include ITs that are 16x & 24x
- df10 = df5 further broken down to only include "micro", "nano", and "small"
- df11 = df5 further broken down to only include ITs that are 2x
- df12 = df5 further broken down to only include ITs that are 3x, 4x, & 6x
- df13 = df5 further broken down to only include "xlarge" ITs
- df14 = df5 further broken down to contain the ITs with the highest spot prices from df6 to df13
- df15 = only obs from df3 where the AZ is us-east-1e
- df16 = df15 further broken down to only include Linux/UNIX PDs and the latest generation ITs
- df17 = only obs from df3 where the AZ is us-east-1a
- df18 = only obs from df3 where the AZ is us-east-1b
- df19 = only obs from df3 where the AZ is us-east-1d
- df20 = only obs from df3 where the AZ is us-east-1e
- df21 = only obs from df3 where the AZ is us-east-1f
- df22 = only obs from df3 where the PD is Linux/UNIX products
- df23 = only obs from df3 where the PD is Red Hat Enterprise Linux products
- df24 = only obs from df3 where the PD is SUSE Linux products
- df25 = only obs from df3 where the PD is Windows products
- df26 = only obs from df3 where the IT is p4d.24xlarge
- df27 = only obs from df26 where the PD is Linux/UNIX products
- df28 = only obs from df26 where the PD is Red Hat Enterprise Linux products
- df29 = only obs from df26 where the PD is SUSE/Linux products
- df30 = df27 further broken down to only include the us-east-1a AZ and Dates/Times from 5/22/2021 onward
- df31 = df28 further broken down to only include the us-east-1a AZ and Dates/Times from 5/22/2021 onward
- df32 = df29 further broken down to only include the us-east-1a AZ and Dates/Times from 5/22/2021 onward
- df33 = df27 further broken down to only include the us-east-1d AZ and Dates/Times from 5/22/2021 onward
- df34 = df28 further broken down to only include the us-east-1d AZ and Dates/Times from 5/22/2021 onward
- df35 = df29 further broken down to only include the us-east-1d AZ and Dates/Times from 5/22/2021 onward
- df36 = only obs from df3 that contains the latest generation ITs
- df37 = df36 further broken down to only include Windows products
- df38 = df3 with renamed columns
- df39 = df37 with renamed columns
- df40 = df38 with only the latest generation ITs

In [None]:
import pandas as pd
pd.TimeSeries = pd.Series
import numpy as np
import sys
from matplotlib import pyplot as plt
from matplotlib import dates as mpl_dates
from datetime import datetime, timedelta
from tqdm import tqdm
from scipy import integrate
from scipy.interpolate import interp1d
#importing required packages/libraries

In [None]:
print(plt.style.available) #available styles built into matplotlib

In [None]:
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault) #RUN THIS CELL TO GO BACK TO DEFAULTS

In [None]:
df1 = pd.read_csv('Data/spot_0601.tsv', sep='\t', header=None) #importing spot_0601 file
df1.columns=['SPH','Availability Zone','Instance Type','Product Description','Spot Price','Date and Time Stamp']
    #setting column names
df1.head() #printing out the first 5 obs

In [None]:
df1.drop(columns=['SPH'],inplace=True) #drops the SPH column and saves that change

In [None]:
df1.head() #confirms that the SPH column was dropped

In [None]:
df1.shape #gives the number of rows and columns of the df

In [None]:
df1.info() #provides info on the df

In [None]:
df2 = df1.sort_values(by=['Availability Zone','Instance Type','Spot Price'])
df2

In [None]:
high_SP = (df2['Spot Price'] > 5.00) #creates a variable where the Spot Price is greater than 5
df2.loc[high_SP] #shows the obs that have the high Spot Price

In [None]:
df2['Spot Price'].mean().round(2) #the mean spot price for the entire dataset to 2 decimal places

In [None]:
df2['Spot Price'].median().round(2) #the median spot price for the entire dataset to 2 decimal places

## Creating a barplot of the mean and median spot prices for each availability zone

In [None]:
filt1a = df2['Availability Zone'] == 'us-east-1a'
filt1b = df2['Availability Zone'] == 'us-east-1b'
filt1c = df2['Availability Zone'] == 'us-east-1c'
filt1d = df2['Availability Zone'] == 'us-east-1d'
filt1e = df2['Availability Zone'] == 'us-east-1e'
filt1f = df2['Availability Zone'] == 'us-east-1f'
#creating filters based on availability zones

In [None]:
mean_1a = df2.loc[filt1a,'Spot Price'].mean().round(4) #gives the mean spot price for the us-east-1a AZ
median_1a = df2.loc[filt1a,'Spot Price'].median().round(4) #gives the median spot price for the us-east-1a AZ

mean_1b = df2.loc[filt1b,'Spot Price'].mean().round(4) #gives the mean spot price for the us-east-1b AZ
median_1b = df2.loc[filt1b,'Spot Price'].median().round(4) #gives the median spot price for the us-east-1b AZ

mean_1c = df2.loc[filt1c,'Spot Price'].mean().round(4) #gives the mean spot price for the us-east-1c AZ
median_1c = df2.loc[filt1c,'Spot Price'].median().round(4) #gives the median spot price for the us-east-1c AZ

mean_1d = df2.loc[filt1d,'Spot Price'].mean().round(4) #gives the mean spot price for the us-east-1d AZ
median_1d = df2.loc[filt1d,'Spot Price'].median().round(4) #gives the median spot price for the us-east-1d AZ

mean_1e = df2.loc[filt1e,'Spot Price'].mean().round(4) #gives the mean spot price for the us-east-1e AZ
median_1e = df2.loc[filt1e,'Spot Price'].median().round(4) #gives the median spot price for the us-east-1e AZ

mean_1f = df2.loc[filt1f,'Spot Price'].mean().round(4) #gives the mean spot price for the us-east-1f AZ
median_1f = df2.loc[filt1f,'Spot Price'].median().round(4) #gives the median spot price for the us-east-1f AZ

In [None]:
AZ_x = ['us-east-1a','us-east-1b','us-east-1c','us-east-1d','us-east-1e','us-east-1f']
SP_mean_AZ = [mean_1a, mean_1b, mean_1c, mean_1d, mean_1e, mean_1f]
SP_median_AZ = [median_1a, median_1b, median_1c, median_1d, median_1e, median_1f]

In [None]:
#side-by-side barplot of the means and medians of the spot prices for each AZ
# MUST RUN 3 ABOVE CELLS FOR THIS TO WORK

#plt.style.use('ggplot') #sets the plot style for this plot

AZx_indexes = np.arange(len(AZ_x)) #creates a variable that is an array of values
                                      #they are a numbered version of our x values

width = 0.25 #creating a width variable

plt.bar(AZx_indexes-0.5*width, SP_mean_AZ, width=width, color='k', label='Mean') #bar plot using x_indexes as x values
                                                                            #need to shift bars (subtracting width)
                                                                            #need to specify width

plt.bar(AZx_indexes+0.5*width, SP_median_AZ, width=width, color='b', label='Median') #bar plot using x_indexes as x values
                                                                         #need to specify width

plt.xticks(ticks=AZx_indexes, labels=AZ_x) #using the indexes for the tick marks, the ages for the labels    

plt.title('Mean and Median Spot Prices (USD) For Each Availability Zone')
plt.xlabel('Availability Zone')
plt.ylabel('Spot Price')
plt.legend()
#plt.grid(True)

plt.tight_layout()
#plt.savefig('barplot1.png') #saves this plot in cd
#plt.show()

## Creating a barplot of the mean and median spot prices for each product description

In [None]:
filtLU = df2['Product Description'] == 'Linux/UNIX'
filtRHEL = df2['Product Description'] == 'Red Hat Enterprise Linux'
filtSL = df2['Product Description'] == 'SUSE Linux'
filtW = df2['Product Description'] == 'Windows'
#creating filters based on product descriptions

In [None]:
mean_LU = df2.loc[filtLU,'Spot Price'].mean().round(4) #gives the mean spot price for the LU PD
median_LU = df2.loc[filtLU,'Spot Price'].median().round(4) #gives the median spot price for the LU PD

mean_RHEL = df2.loc[filtRHEL,'Spot Price'].mean().round(4) #gives the mean spot price for the RHEL PD
median_RHEL = df2.loc[filtRHEL,'Spot Price'].median().round(4) #gives the median spot price for the RHEL PD

mean_SL = df2.loc[filtSL,'Spot Price'].mean().round(4) #gives the mean spot price for the SL PD
median_SL = df2.loc[filtSL,'Spot Price'].median().round(4) #gives the median spot price for the SL PD

mean_W = df2.loc[filtW,'Spot Price'].mean().round(4) #gives the mean spot price for the W PD
median_W = df2.loc[filtW,'Spot Price'].median().round(4) #gives the median spot price for the W PD

In [None]:
PD_x = ['Linux/UNIX','Red Hat Enterprise Linux','SUSE Linux','Windows']
SP_mean_PD = [mean_LU, mean_RHEL, mean_SL, mean_W]
SP_median_PD = [median_LU, median_RHEL, median_SL, median_W]

In [None]:
#side-by-side barplot of the means and medians of the spot prices for each PD
# MUST RUN 3 ABOVE CELLS FOR THIS TO WORK

plt.style.use('classic') #sets the plot style for this plot

PDx_indexes = np.arange(len(PD_x)) #creates a variable that is an array of values
                                      #they are a numbered version of our x values

width = 0.25 #creating a width variable

plt.bar(PDx_indexes-0.5*width, SP_mean_PD, width=width, color='c', label='Mean') #bar plot using x_indexes as x values
                                                                            #need to shift bars (subtracting width)
                                                                            #need to specify width

plt.bar(PDx_indexes+0.5*width, SP_median_PD, width=width, color='m', label='Median') #bar plot using x_indexes as x values
                                                                         #need to specify width

plt.xticks(ticks=PDx_indexes, labels=PD_x) #using the indexes for the tick marks, the ages for the labels    

plt.title('Mean and Median Spot Prices (USD) For Each Product')
plt.ylabel('Spot Price')
plt.legend()
plt.grid(True)

plt.tight_layout()
#plt.savefig('barplot2.png') #saves this plot in cd
#plt.show()

## Creating barplots for the time between pricing events

In [None]:
df2 #reminder of what our data looks like

In [None]:
df2['Date and Time Stamp'] = pd.to_datetime(df2['Date and Time Stamp'])
df2['Date and Time Stamp']

In [None]:
df3 = df2.sort_values(by=['Availability Zone', 'Instance Type', 'Product Description', 'Date and Time Stamp'])
df3

In [None]:
pd.set_option('display.max_columns', 10) #adjusts max display columns
pd.set_option('display.max_rows', 200) #adjusts max display rows

In [None]:
#Test Time Series Plot #1

test1 = df3[(df3['Availability Zone'] == 'us-east-1a') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Linux/UNIX')]

plt.plot_date(test1['Date and Time Stamp'], test1['Spot Price'], linestyle='solid')

plt.gcf().autofmt_xdate()

plt.title('Time Series Plot of the us-east-1a AZ,' + "\n" + 'a1.2xlarge IT, and Linux/UNIX PD')
plt.xlabel('Date')
plt.ylabel('Spot Price')

#plt.show()

In [None]:
#Test Time Series Plot #2

test2 = df3[(df3['Availability Zone'] == 'us-east-1a') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Red Hat Enterprise Linux')]

plt.plot_date(test2['Date and Time Stamp'], test2['Spot Price'], linestyle='solid')

plt.gcf().autofmt_xdate()

plt.title('Time Series Plot of the us-east-1a AZ,' + "\n" + 'a1.2xlarge IT, and Red Hat Enterprise Linux PD')
plt.xlabel('Date')
plt.ylabel('Spot Price')

#plt.show()

In [None]:
#Test Time Series Plot #3

test3 = df3[(df3['Availability Zone'] == 'us-east-1a') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'SUSE Linux')]

plt.plot_date(test3['Date and Time Stamp'], test3['Spot Price'], linestyle='solid')

plt.gcf().autofmt_xdate()

plt.title('Time Series Plot of the us-east-1a AZ,' + "\n" + 'a1.2xlarge IT, and Suse Linux PD')
plt.xlabel('Date')
plt.ylabel('Spot Price')

#plt.show()

In [None]:
df3[(df3['Availability Zone'] == 'us-east-1a') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Windows')]

In [None]:
#Test Time Series Plot #5

test5 = df3[(df3['Availability Zone'] == 'us-east-1b') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Linux/UNIX')]

plt.plot_date(test5['Date and Time Stamp'], test5['Spot Price'], linestyle='solid')

plt.gcf().autofmt_xdate()

plt.title('Time Series Plot of the us-east-1b AZ,' + "\n" + 'a1.2xlarge IT, and Linux/UNIX PD')
plt.xlabel('Date')
plt.ylabel('Spot Price')

#plt.show()

In [None]:
#Test Time Series Plot #6

test6 = df3[(df3['Availability Zone'] == 'us-east-1b') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Red Hat Enterprise Linux')]

plt.plot_date(test6['Date and Time Stamp'], test6['Spot Price'], linestyle='solid')

plt.gcf().autofmt_xdate()

plt.title('Time Series Plot of the us-east-1b AZ,' + "\n" + 'a1.2xlarge IT, and Red Hat Enterprise Linux')
plt.xlabel('Date')
plt.ylabel('Spot Price')

#plt.show()

In [None]:
#Test Time Series Plot #7

test7 = df3[(df3['Availability Zone'] == 'us-east-1b') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'SUSE Linux')]

plt.plot_date(test7['Date and Time Stamp'], test7['Spot Price'], linestyle='solid')

plt.gcf().autofmt_xdate()

plt.title('Time Series Plot of the us-east-1b AZ,' + "\n" + 'a1.2xlarge IT, and Suse Linux')
plt.xlabel('Date')
plt.ylabel('Spot Price')

#plt.show()

In [None]:
df3[(df3['Availability Zone'] == 'us-east-1b') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Windows')]

In [None]:
#Test Time Series Plot #9

test9 = df3[(df3['Availability Zone'] == 'us-east-1c') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Linux/UNIX')]

plt.plot_date(test9['Date and Time Stamp'], test9['Spot Price'], linestyle='solid')

plt.gcf().autofmt_xdate()

plt.title('Time Series Plot of the us-east-1c AZ,' + "\n" + 'a1.2xlarge IT, and Linux/UNIX PD')
plt.xlabel('Date')
plt.ylabel('Spot Price')

#plt.show()

In [None]:
#Test Time Series Plot #10

test10 = df3[(df3['Availability Zone'] == 'us-east-1c') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Red Hat Enterprise Linux')]

plt.plot_date(test10['Date and Time Stamp'], test10['Spot Price'], linestyle='solid')

plt.gcf().autofmt_xdate()

plt.title('Time Series Plot of the us-east-1c AZ,' + "\n" + 'a1.2xlarge IT, and Red Hat Enterprise Linux PD')
plt.xlabel('Date')
plt.ylabel('Spot Price')

#plt.show()

In [None]:
#Test Time Series Plot #11

test11 = df3[(df3['Availability Zone'] == 'us-east-1c') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'SUSE Linux')]

plt.plot_date(test11['Date and Time Stamp'], test11['Spot Price'], linestyle='solid')

plt.gcf().autofmt_xdate()

plt.title('Time Series Plot of the us-east-1c AZ,' + "\n" + 'a1.2xlarge IT, and SUSE Linux PD')
plt.xlabel('Date')
plt.ylabel('Spot Price')

#plt.show()

In [None]:
df3[(df3['Availability Zone'] == 'us-east-1c') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Windows')]

In [None]:
df3[(df3['Availability Zone'] == 'us-east-1d') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Linux/UNIX')]

In [None]:
df3[(df3['Availability Zone'] == 'us-east-1d') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Red Hat Enterprise Linux')]

In [None]:
df3[(df3['Availability Zone'] == 'us-east-1d') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'SUSE Linux')]

In [None]:
df3[(df3['Availability Zone'] == 'us-east-1d') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Windows')]

In [None]:
df3[(df3['Availability Zone'] == 'us-east-1e') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Linux/UNIX')]

In [None]:
df3[(df3['Availability Zone'] == 'us-east-1e') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Red Hat Enterprise Linux')]

In [None]:
df3[(df3['Availability Zone'] == 'us-east-1e') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'SUSE Linux')]

In [None]:
df3[(df3['Availability Zone'] == 'us-east-1e') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Windows')]

In [None]:
df3[(df3['Availability Zone'] == 'us-east-1f') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Linux/UNIX')]

In [None]:
df3[(df3['Availability Zone'] == 'us-east-1f') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Red Hat Enterprise Linux')]

In [None]:
df3[(df3['Availability Zone'] == 'us-east-1f') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'SUSE Linux')]

In [None]:
df3[(df3['Availability Zone'] == 'us-east-1f') & (df3['Instance Type'] == 'a1.2xlarge') & 
    (df3['Product Description'] == 'Windows')]

## Miscellaneous

In [None]:
pd.unique(df3['Availability Zone']) #lists unique availability zone values

In [None]:
df3['Availability Zone'].unique()

In [None]:
pd.unique(df3['Instance Type']) #lists unique instance type values

In [None]:
len(pd.unique(df3['Instance Type'])) #lists # of unique instance values

In [None]:
pd.unique(df3['Product Description']) #lists unique product description values

In [None]:
test25 = df3.loc[(df3['Instance Type'].str.contains("metal", case=False)) & 
                 (df3['Product Description'].str.contains("Linux", case=False))]
test25

### At this point, I can see that spot prices are highest in the us-east-1c availability zone, so I am going to do some further analyses on these observations

In [None]:
filt1c = df3['Availability Zone'] == 'us-east-1c'
df4 = df3.loc[filt1c]
df4 #new data frame of observations limited to the us-east-1c availability zone

In [None]:
filtLU = df4['Product Description'] == 'Linux/UNIX'
filtRHEL = df4['Product Description'] == 'Red Hat Enterprise Linux'
filtSL = df4['Product Description'] == 'SUSE Linux'
filtW = df4['Product Description'] == 'Windows'
#creating filters based on product descriptions


mean_LU = df4.loc[filtLU,'Spot Price'].mean().round(4) #gives the mean spot price for the LU PD
median_LU = df4.loc[filtLU,'Spot Price'].median().round(4) #gives the median spot price for the LU PD

mean_RHEL = df4.loc[filtRHEL,'Spot Price'].mean().round(4) #gives the mean spot price for the RHEL PD
median_RHEL = df4.loc[filtRHEL,'Spot Price'].median().round(4) #gives the median spot price for the RHEL PD

mean_SL = df4.loc[filtSL,'Spot Price'].mean().round(4) #gives the mean spot price for the SL PD
median_SL = df4.loc[filtSL,'Spot Price'].median().round(4) #gives the median spot price for the SL PD

mean_W = df4.loc[filtW,'Spot Price'].mean().round(4) #gives the mean spot price for the W PD
median_W = df4.loc[filtW,'Spot Price'].median().round(4) #gives the median spot price for the W PD


PD_x = ['Linux/UNIX','Red Hat Enterprise Linux','SUSE Linux','Windows']
SP_mean_PD = [mean_LU, mean_RHEL, mean_SL, mean_W]
SP_median_PD = [median_LU, median_RHEL, median_SL, median_W]

In [None]:
#side-by-side barplot of the means and medians of the spot prices for each PD in the us-east-1c AZ
# MUST RUN ABOVE CELL FOR THIS TO WORK

plt.style.use('ggplot') #sets the plot style for this plot

PDx_indexes = np.arange(len(PD_x)) #creates a variable that is an array of values
                                      #they are a numbered version of our x values

width = 0.25 #creating a width variable

plt.bar(PDx_indexes-0.5*width, SP_mean_PD, width=width, color='#c42929', label='Mean') #bar plot using x_indexes as x values
                                                                            #need to shift bars (subtracting width)
                                                                            #need to specify width

plt.bar(PDx_indexes+0.5*width, SP_median_PD, width=width, color='#f78f0f', label='Median') #bar plot using x_indexes as x values
                                                                         #need to specify width

plt.xticks(ticks=PDx_indexes, labels=PD_x) #using the indexes for the tick marks, the ages for the labels    

plt.title('Mean and Median Spot Prices (USD) For Each Product' + "\n" + 'Within the us-east-1c Availability Zone')
plt.ylabel('Spot Price')
plt.legend(loc='upper left')
#plt.grid(True)
plt.yticks([0,0.25,0.5,0.75,1,1.25,1.5,1.75,2,2.25])

plt.tight_layout()
#plt.savefig('barplot3.png') #saves this plot in cd
#plt.show()

In [None]:
df4['Instance Type'].unique() #reminder of Instance Types in df4, still all 389

In [None]:
df5 = df4[(df4['Instance Type'].str.contains("a1|c6|df3|f1|g4|h1|i3|inf1|m6|p4|r6|t4|x2|z1"))
    & (df4['Product Description'] == 'Linux/UNIX')]
df5

In [None]:
df5['Instance Type'].unique()

In [None]:
df6 = df5[(df5['Instance Type'].str.contains("\.medium|\.large"))]
df6

In [None]:
df7 = df5[(df5['Instance Type'].str.contains("8xlarge|12xlarge"))]
df7

In [None]:
# Horizontal Bar Chart

plt.style.use('seaborn')

plt.barh(df7['Instance Type'],df7['Spot Price'], color="#eb5ced")

plt.title('8x & 12x Instance Types Within the us-east-1c' + "\n" + 
         'Availability Zone for Linux/UNIX Products')
plt.xlabel('Highest Spot Price (USD)')

plt.tight_layout()
plt.grid(True)
#plt.savefig('horizbarplot2.png') #saves this plot in cd
#plt.show()

In [None]:
df8 = df5[(df5['Instance Type'].str.contains("metal"))]
df8

In [None]:
# Horizontal Bar Chart

plt.style.use('ggplot')

plt.barh(df8['Instance Type'],df8['Spot Price'],color='#404040')

plt.title('Metal Instance Types Within the us-east-1c' + "\n" + 
         'Availability Zone for Linux/UNIX Products')
plt.xlabel('Highest Spot Price (USD)')
plt.xticks([0,0.25,0.5,0.75,1,1.25,1.5,1.75,2,2.25,2.5,2.75,3,3.25,3.5])

plt.tight_layout()
plt.grid(True)
#plt.savefig('horizbarplot3.png') #saves this plot in cd
#plt.show()

In [None]:
df9 = df5[(df5['Instance Type'].str.contains("16xlarge|24xlarge"))]
df9

In [None]:
# Horizontal Bar Chart

plt.style.use('ggplot')

plt.barh(df9['Instance Type'],df9['Spot Price'],color='#e30000')

plt.title('16x & 24x Instance Types Within the us-east-1c' + "\n" + 
         'Availability Zone for Linux/UNIX Products')
plt.xlabel('Highest Spot Price (USD)')
plt.xticks([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14])

plt.tight_layout()
plt.grid(True)
#plt.savefig('horizbarplot4.png') #saves this plot in cd
#plt.show()

In [None]:
df10 = df5[(df5['Instance Type'].str.contains("micro|nano|small"))]
df10

In [None]:
# Horizontal Bar Chart

plt.style.use('ggplot')

plt.barh(df10['Instance Type'],df10['Spot Price'],color='#edbc09')

plt.title('Nano, Micro, and Small Instance Types Within the us-east-1c' + "\n" + 
         'Availability Zone for Linux/UNIX Products')
plt.xlabel('Highest Spot Price (USD)')

plt.tight_layout()
plt.grid(True)
#plt.savefig('horizbarplot5.png') #saves this plot in cd
#plt.show()

In [None]:
df11 = df5[(df5['Instance Type'].str.contains("\.2x"))]
df11

In [None]:
# Horizontal Bar Chart

plt.style.use('ggplot')

plt.barh(df11['Instance Type'],df11['Spot Price'],color='#f27100')

plt.title('2x Instance Types Within the us-east-1c' + "\n" + 
         'Availability Zone for Linux/UNIX Products')
plt.xlabel('Highest Spot Price (USD)')
plt.xticks([0,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5])

plt.tight_layout()
plt.grid(True)
#plt.savefig('horizbarplot6.png') #saves this plot in cd
#plt.show()

In [None]:
df12 = df5[(df5['Instance Type'].str.contains("\.3x|\.4x|\.6x"))]
df12

In [None]:
# Horizontal Bar Chart

plt.style.use('ggplot')

plt.barh(df12['Instance Type'],df12['Spot Price'],color='#7d0dde')

plt.title('3x, 4x, & 6x Instance Types Within the us-east-1c' + "\n" + 
         'Availability Zone for Linux/UNIX Products')
plt.xlabel('Highest Spot Price (USD)')

plt.tight_layout()
plt.grid(True)
#plt.savefig('horizbarplot7.png') #saves this plot in cd
#plt.show()

In [None]:
df13 = df5[(df5['Instance Type'].str.contains("\.xlarge"))]
df13

In [None]:
# Horizontal Bar Chart

plt.style.use('ggplot')

plt.barh(df13['Instance Type'],df13['Spot Price'],color='#13ad54')

plt.title('XL Instance Types Within the us-east-1c' + "\n" + 
         'Availability Zone for Linux/UNIX Products')
plt.xlabel('Highest Spot Price (USD)')

plt.tight_layout()
plt.grid(True)
#plt.savefig('horizbarplot8.png') #saves this plot in cd
#plt.show()

In [None]:
IT_filt = df5['Instance Type'].isin(['i3en.large','x2gd.medium','g4dn.12xlarge','i3.8xlarge','i3en.metal','p4d.24xlarge',
                             'f1.16xlarge','t4g.small','t4g.nano','t4g.micro','f1.2xlarge','f1.4xlarge',
                             'i3en.6xlarge','i3en.3xlarge','g4dn.xlarge'])

df14 = df5.loc[IT_filt]
df14

In [None]:
df14['Instance Type'].unique()

In [None]:
filt1e = df3['Availability Zone'] == 'us-east-1e'
df15 = df3.loc[filt1e]
df15 #new data frame of observations limited to the us-east-1e availability zone

In [None]:
df16 = df15[(df15['Instance Type'].str.contains("a1|c6|df3|f1|g4|h1|i3|inf1|m6|p4|r6|t4|x2|z1")) & 
            (df15['Product Description'] == 'Linux/UNIX')]
df16

In [None]:
df16['Instance Type'].unique()

In [None]:
#already in date time form

#epochconverter.com

#convert string to a datetime object

#for loop: iterate through all days, would be easier in timestamp --> iterate by number of seconds

#string concatenation: using range for datetime, timestamp would be easier

#

## Numerical and/or Graphical Analysis of Availability Zones

##### Number Crunching

In [None]:
df3 #reminder of this data frame

In [None]:
filt1a_3 = df3['Availability Zone'] == 'us-east-1a'
filt1b_3 = df3['Availability Zone'] == 'us-east-1b'
filt1c_3 = df3['Availability Zone'] == 'us-east-1c'
filt1d_3 = df3['Availability Zone'] == 'us-east-1d'
filt1e_3 = df3['Availability Zone'] == 'us-east-1e'
filt1f_3 = df3['Availability Zone'] == 'us-east-1f'
#creating filters based on availability zones

In [None]:
df17 = df3.loc[filt1a_3] #new data frame of observations limited to the us-east-1a availability zone
df18 = df3.loc[filt1b_3] #new data frame of observations limited to the us-east-1b availability zone
#df4 is already a data frame of observations 
df19 = df3.loc[filt1d_3] #new data frame of observations limited to the us-east-1d availability zone
df20 = df3.loc[filt1e_3] #new data frame of observations limited to the us-east-1e availability zone
df21 = df3.loc[filt1f_3] #new data frame of observations limited to the us-east-1f availability zone

In [None]:
print(df17['Spot Price'].describe().round(4)) #us-east-1a

In [None]:
print(df18['Spot Price'].describe().round(4)) #us-east-1b

In [None]:
print(df4['Spot Price'].describe().round(4)) #us-east-1c

In [None]:
print(df19['Spot Price'].describe().round(4)) #us-east-1d

In [None]:
print(df20['Spot Price'].describe().round(4)) #us-east-1e

In [None]:
print(df21['Spot Price'].describe().round(4)) #us-east-1f

##### Plotting

In [None]:
# Histograms
plt.style.use('fivethirtyeight')

bins=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    #creates objects to specify bins, results shown fall in these ranges
    
plt.hist(df17['Spot Price'], bins=bins, color='#1b702d', edgecolor='k', linewidth=1.25)

mean1a = 0.9013
color1 = '#03c4ff'
plt.axvline(mean1a, color=color1, label='Mean', linewidth=2)

median1a = 0.5040
color2 = '#ffbdc3'
plt.axvline(median1a, color=color2, label='Median', linewidth=2)

plt.title('Spot Prices in the us-east-1a Availability Zone')
plt.xlabel('Spot Prices (USD)')
plt.ylabel('Total Count')
plt.yticks([0,10000,20000,30000,40000,50000,60000,70000,80000,90000,100000,110000,120000,
           130000,140000,150000,160000])

plt.legend()
plt.tight_layout()
#plt.savefig('histogram1.png')
#plt.show()

In [None]:
# Histograms
plt.style.use('fivethirtyeight')

bins=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    #creates objects to specify bins, results shown fall in these ranges
    
plt.hist(df18['Spot Price'], bins=bins, color='#552aa1', edgecolor='k', linewidth=1.25)

mean1b = 0.9051
color1 = '#e6e60e'
plt.axvline(mean1a, color=color1, label='Mean', linewidth=2)

median1b = 0.5334
color2 = '#ffa200'
plt.axvline(median1a, color=color2, label='Median', linewidth=2)

plt.title('Spot Prices in the us-east-1b Availability Zone')
plt.xlabel('Spot Prices (USD)')
plt.ylabel('Total Count')

plt.legend()
plt.tight_layout()
#plt.savefig('histogram2.png')
#plt.show()

In [None]:
# Histograms
plt.style.use('fivethirtyeight')

bins=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    #creates objects to specify bins, results shown fall in these ranges
    
plt.hist(df4['Spot Price'], bins=bins, color='#201bb5', edgecolor='k', linewidth=1.25)

mean1c = 1.0286
color1 = '#2fede4'
plt.axvline(mean1a, color=color1, label='Mean', linewidth=2)

median1c = 0.5381
color2 = '#f23fe9'
plt.axvline(median1a, color=color2, label='Median', linewidth=2)

plt.title('Spot Prices in the us-east-1c Availability Zone')
plt.xlabel('Spot Prices (USD)')
plt.ylabel('Total Count')

plt.legend()
plt.tight_layout()
#plt.savefig('histogram3.png')
#plt.show()

In [None]:
# Histograms
plt.style.use('fivethirtyeight')

bins=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    #creates objects to specify bins, results shown fall in these ranges
    
plt.hist(df19['Spot Price'], bins=bins, color='#db1a1a', edgecolor='k', linewidth=1.25)

mean1d = 0.9813
color1 = '#407cff'
plt.axvline(mean1a, color=color1, label='Mean', linewidth=2)

median1d = 0.5402
color2 = '#1dad4b'
plt.axvline(median1a, color=color2, label='Median', linewidth=2)

plt.title('Spot Prices in the us-east-1d Availability Zone')
plt.xlabel('Spot Prices (USD)')
plt.ylabel('Total Count')

plt.legend()
plt.tight_layout()
#plt.savefig('histogram4.png')
#plt.show()

In [None]:
# Histograms
plt.style.use('fivethirtyeight')

bins=[0,1,2,3,4,5,6,7,8,9,10]
    #creates objects to specify bins, results shown fall in these ranges
    
plt.hist(df20['Spot Price'], bins=bins, color='#ab13bf', edgecolor='k', linewidth=1.25)

mean1e = 0.6738
color1 = '#33d6c9'
plt.axvline(mean1a, color=color1, label='Mean', linewidth=2)

median1e = 0.3300
color2 = '#e35d5d'
plt.axvline(median1a, color=color2, label='Median', linewidth=2)

plt.title('Spot Prices in the us-east-1e Availability Zone')
plt.xlabel('Spot Prices (USD)')
plt.ylabel('Total Count')

plt.legend()
plt.tight_layout()
#plt.savefig('histogram5.png')
#plt.show()

In [None]:
# Histograms
plt.style.use('fivethirtyeight')

bins=[0,1,2,3,4,5,6,7,8,9]
    #creates objects to specify bins, results shown fall in these ranges
    
plt.hist(df21['Spot Price'], bins=bins, color='#ff9e17', edgecolor='k', linewidth=1.25)

mean1f = 0.9126
color1 = '#8443b5'
plt.axvline(mean1a, color=color1, label='Mean', linewidth=2)

median1f = 0.5040
color2 = '#b54ea2'
plt.axvline(median1a, color=color2, label='Median', linewidth=2)

plt.title('Spot Prices in the us-east-1f Availability Zone')
plt.xlabel('Spot Prices (USD)')
plt.ylabel('Total Count')

plt.legend()
plt.tight_layout()
#plt.savefig('histogram6.png')
#plt.show()

## Numerical and/or Graphical Analysis of Availability Zones

##### Number Crunching

In [None]:
filtLU_3 = df3['Product Description'] == 'Linux/UNIX'
filtRHEL_3 = df3['Product Description'] == 'Red Hat Enterprise Linux'
filtSL_3 = df3['Product Description'] == 'SUSE Linux'
filtW_3 = df3['Product Description'] == 'Windows'
#creating filters based on product descriptions

In [None]:
df22 = df3.loc[filtLU_3] #new data frame of observations limited to the Linux/UNIX products
df23 = df3.loc[filtRHEL_3] #new data frame of observations limited to the Red Hat Enterprise Linux products
df24 = df3.loc[filtSL_3] #new data frame of observations limited to the SUSE Linux products
df25 = df3.loc[filtW_3] #new data frame of observations limited to the Windows products

In [None]:
print(df22['Spot Price'].describe().round(4))

In [None]:
print(df23['Spot Price'].describe().round(4))

In [None]:
print(df24['Spot Price'].describe().round(4))

In [None]:
print(df25['Spot Price'].describe().round(4))

##### Plotting

In [None]:
#BoxPlot

PDs = [df22['Spot Price'],df23['Spot Price'],df24['Spot Price'],df25['Spot Price']]

plt.boxplot(PDs)

plt.title('Boxplots of Spot Prices within All Product Descriptions')
plt.xticks([1,2,3,4], ['Linux/UNIX', 'Red Hat Enterprise Linux', 'SUSE Linux', 'Windows'])

plt.tight_layout()
#plt.savefig('boxplot1.png')
#plt.show()

In [None]:
## ALL WORK ABOVE WAS DONE ON OR BEFORE FRIDAY, JUNE 11, 2021

In [None]:
# Things to mess around with Monday:
    # Further analyze the p4d.24xlarge IT (numerical and/or graphical)
    # Further analyze the Windows PD (numerical and/or graphical)
    # Attempt to do more time series plotting

In [None]:
filtIT = df3['Instance Type'] == 'p4d.24xlarge'
df26 = df3.loc[filtIT]
df26

In [None]:
print(df26['Spot Price'].describe().round(4))

In [None]:
df27 = df26.loc[df26['Product Description'] == 'Linux/UNIX']
df27

### Time Series Plots

In [None]:
# Time Series Plot for df27 us-east-1a AZ
plt.style.use('seaborn-poster')

filt1a_27 = df27.loc[df27['Availability Zone'] == 'us-east-1a']

plt.plot_date(filt1a_27['Date and Time Stamp'], filt1a_27['Spot Price'], linestyle='solid',color='#681aa3')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the p4d.24xlarge Instance Type Across the ' + "\n" +
          'us-east-1a Availability Zone and Linux/UNIX Products')
plt.xlabel('Date')
plt.ylabel('Spot Price')
plt.yticks([9.75,10,10.25,10.5,10.75,11,11.25,11.5,11.75])

plt.tight_layout()
#plt.savefig('timeseries1.png')
#plt.show()

In [None]:
# Time Series Plot for df27 us-east-1c AZ
plt.style.use('seaborn-poster')

filt1c_27 = df27.loc[df27['Availability Zone'] == 'us-east-1c']

plt.plot_date(filt1c_27['Date and Time Stamp'], filt1c_27['Spot Price'], linestyle='solid',color='#3478d1')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the p4d.24xlarge Instance Type Across the ' + "\n" +
          'us-east-1c Availability Zone and Linux/UNIX Products')
plt.xlabel('Date')
plt.ylabel('Spot Price')
plt.yticks([9.5,10,10.5,11,11.5,12,12.5,13,13.5])

plt.tight_layout()
#plt.savefig('timeseries2.png')
#plt.show()

In [None]:
# Time Series Plot for df27 us-east-1d AZ
plt.style.use('seaborn-poster')

filt1d_27 = df27.loc[df27['Availability Zone'] == 'us-east-1d']

plt.plot_date(filt1d_27['Date and Time Stamp'], filt1d_27['Spot Price'], linestyle='solid',color='#d12cb8')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the p4d.24xlarge Instance Type Across the ' + "\n" +
          'us-east-1d Availability Zone and Linux/UNIX Products')
plt.xlabel('Date')
plt.ylabel('Spot Price')

plt.tight_layout()
#plt.savefig('timeseries3.png')
#plt.show()

In [None]:
df28 = df26.loc[df26['Product Description'] == 'Red Hat Enterprise Linux']
df28

In [None]:
# Time Series Plot for df28 us-east-1a AZ
plt.style.use('seaborn-poster')

filt1a_28 = df28.loc[df28['Availability Zone'] == 'us-east-1a']

plt.plot_date(filt1a_28['Date and Time Stamp'], filt1a_28['Spot Price'], linestyle='solid',color='#107030')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the p4d.24xlarge Instance Type Across the ' + "\n" +
          'us-east-1a Availability Zone and Red Hat Enterprise Linux Products')
plt.xlabel('Date')
plt.ylabel('Spot Price')
plt.yticks([9.75,10,10.25,10.5,10.75,11,11.25,11.5,11.75])

plt.tight_layout()
#plt.savefig('timeseries4.png')
#plt.show()

In [None]:
# Time Series Plot for df28 us-east-1c AZ
plt.style.use('seaborn-poster')

filt1c_28 = df28.loc[df28['Availability Zone'] == 'us-east-1c']

plt.plot_date(filt1c_28['Date and Time Stamp'], filt1c_28['Spot Price'], linestyle='solid',color='#e88d15')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the p4d.24xlarge Instance Type Across the ' + "\n" +
          'us-east-1c Availability Zone and Red Hat Enterprise Linux Products')
plt.xlabel('Date')
plt.ylabel('Spot Price')

plt.tight_layout()
#plt.savefig('timeseries5.png')
#plt.show()

In [None]:
# Time Series Plot for df28 us-east-1d AZ
plt.style.use('seaborn-poster')

filt1d_28 = df28.loc[df28['Availability Zone'] == 'us-east-1d']

plt.plot_date(filt1d_28['Date and Time Stamp'], filt1d_28['Spot Price'], linestyle='solid',color='#bf173e')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the p4d.24xlarge Instance Type Across the ' + "\n" +
          'us-east-1d Availability Zone and Red Hat Enterprise Linux Products')
plt.xlabel('Date')
plt.ylabel('Spot Price')

plt.tight_layout()
#plt.savefig('timeseries6.png')
#plt.show()

In [None]:
df29 = df26.loc[df26['Product Description'] == 'SUSE Linux']
df29

In [None]:
# Time Series Plot for df29 us-east-1a AZ
plt.style.use('seaborn-poster')

filt1a_29 = df29.loc[df29['Availability Zone'] == 'us-east-1a']

plt.plot_date(filt1a_29['Date and Time Stamp'], filt1a_29['Spot Price'], linestyle='solid',color='#00ccc8')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the p4d.24xlarge Instance Type Across the ' + "\n" +
          'us-east-1a Availability Zone and SUSE Linux Products')
plt.xlabel('Date')
plt.ylabel('Spot Price')
plt.yticks([9.75,10,10.25,10.5,10.75,11,11.25,11.5,11.75])

plt.tight_layout()
#plt.savefig('timeseries7.png')
#plt.show()

In [None]:
# Time Series Plot for df29 us-east-1c AZ
plt.style.use('seaborn-poster')

filt1c_29 = df29.loc[df29['Availability Zone'] == 'us-east-1c']

plt.plot_date(filt1c_29['Date and Time Stamp'], filt1c_29['Spot Price'], linestyle='solid',color='#512aad')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the p4d.24xlarge Instance Type Across the ' + "\n" +
          'us-east-1c Availability Zone and SUSE Linux Products')
plt.xlabel('Date')
plt.ylabel('Spot Price')

plt.tight_layout()
#plt.savefig('timeseries8.png')
#plt.show()

In [None]:
# Time Series Plot for df29 us-east-1d AZ
plt.style.use('seaborn-poster')

filt1d_29 = df29.loc[df29['Availability Zone'] == 'us-east-1d']

plt.plot_date(filt1d_29['Date and Time Stamp'], filt1d_29['Spot Price'], linestyle='solid',color='#62bd13')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the p4d.24xlarge Instance Type Across the ' + "\n" +
          'us-east-1d Availability Zone and SUSE Linux Products')
plt.xlabel('Date')
plt.ylabel('Spot Price')

plt.tight_layout()
#plt.savefig('timeseries9.png')
#plt.show()

In [None]:
df30 = df27.loc[(df27['Availability Zone'] == 'us-east-1a') &
                (df27['Date and Time Stamp'] >= '2021-05-22 00:00:00+00:00')]
#df30 further breaks down df27 to only contain obs from the us-east-1a AZ and from 5/22/2021 onward

df31 = df28.loc[(df28['Availability Zone'] == 'us-east-1a') &
                (df28['Date and Time Stamp'] >= '2021-05-22 00:00:00+00:00')]
#df31 further breaks down df28 to only contain obs from the us-east-1a AZ and from 5/22/2021 onward

df32 = df29.loc[(df29['Availability Zone'] == 'us-east-1a') &
                (df29['Date and Time Stamp'] >= '2021-05-22 00:00:00+00:00')]
#df32 further breaks down df29 to only contain obs from the us-east-1a AZ and from 5/22/2021 onward

In [None]:
# Time series of p4d.24xlarge across the us-east-1a AZ

plt.style.use('seaborn-poster')

plt.plot_date(df30['Date and Time Stamp'], df30['Spot Price'], linestyle='solid',
              color='k', label='Linux/UNIX')
plt.plot_date(df31['Date and Time Stamp'], df31['Spot Price'], linestyle='solid',
              color='b', label='Red Hat Enterprise Linux')
plt.plot_date(df32['Date and Time Stamp'], df32['Spot Price'], linestyle='solid',
              color='c', label='SUSE Linux')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the p4d.24xlarge Instance Type Across the us-east-1a' + "\n" +
          'Availability Zone and all Product Descriptions From 5/22/2021 Onward')
plt.xlabel('Date')
plt.ylabel('Spot Price')

plt.legend()
plt.tight_layout()
#plt.savefig('timeseries10.png')
#plt.show()

In [None]:
df3.columns

In [None]:
df38 = df3.copy()
df38.columns = ['AZ','IT','OS','SP',"DT"]
df38.head()

In [None]:
print(df30['Spot Price'].describe())

In [None]:
print(df31['Spot Price'].describe())

In [None]:
print(df32['Spot Price'].describe())

In [None]:
df33 = df27.loc[(df27['Availability Zone'] == 'us-east-1d') &
                (df27['Date and Time Stamp'] >= '2021-05-22 00:00:00+00:00')]
#df33 further breaks down df27 to only contain obs from the us-east-1d AZ and from 5/22/2021 onward

df34 = df28.loc[(df28['Availability Zone'] == 'us-east-1d') &
                (df28['Date and Time Stamp'] >= '2021-05-22 00:00:00+00:00')]
#df34 further breaks down df28 to only contain obs from the us-east-1d AZ and from 5/22/2021 onward

df35 = df29.loc[(df29['Availability Zone'] == 'us-east-1d') &
                (df29['Date and Time Stamp'] >= '2021-05-22 00:00:00+00:00')]
#df35 further breaks down df29 to only contain obs from the us-east-1d AZ and from 5/22/2021 onward

In [None]:
# Time series of p4d.24xlarge across the us-east-1d AZ
plt.style.use('seaborn-poster')

plt.plot_date(df33['Date and Time Stamp'], df33['Spot Price'], linestyle='solid',
              color='k', label='Linux/UNIX')
plt.plot_date(df34['Date and Time Stamp'], df34['Spot Price'], linestyle='solid',
              color='b', label='Red Hat Enterprise Linux')
plt.plot_date(df35['Date and Time Stamp'], df34['Spot Price'], linestyle='solid',
              color='b', label='SUSE Linux')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the p4d.24xlarge Instance Type Across the us-east-1d' + "\n" +
          'Availability Zone and all Product Descriptions From 5/22/2021 Onward')
plt.xlabel('Date')
plt.ylabel('Spot Price')

plt.legend()
plt.tight_layout()
#plt.savefig('timeseries11.png')
#plt.show()

In [None]:
print(df33['Spot Price'].describe())

In [None]:
print(df34['Spot Price'].describe())

In [None]:
print(df35['Spot Price'].describe())

In [None]:
df36 = df3.loc[df3['Instance Type'].str.contains("a1|c6|df3|f1|g4|h1|i3|inf1|m6|p4|r6|t4|x2|z1")]
df36

In [None]:
# Time series of a1 ITs within the us-east-1a AZ for Linux/UNIX products
plt.style.use('seaborn-poster')

filt36_1 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.medium') &
           (df36['Product Description'] == 'Linux/UNIX')]
filt36_2 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.metal') &
           (df36['Product Description'] == 'Linux/UNIX')]
filt36_3 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.large') &
           (df36['Product Description'] == 'Linux/UNIX')]
filt36_4 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.xlarge') &
           (df36['Product Description'] == 'Linux/UNIX')]
filt36_5 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.2xlarge') &
           (df36['Product Description'] == 'Linux/UNIX')]
filt36_6 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.4xlarge') &
           (df36['Product Description'] == 'Linux/UNIX')]

plt.plot_date(filt36_1['Date and Time Stamp'], filt36_1['Spot Price'], linestyle='solid',
              color='k', label='a1.medium')
plt.plot_date(filt36_2['Date and Time Stamp'], filt36_2['Spot Price'], linestyle='solid',
              color='b', label='a1.metal')
plt.plot_date(filt36_3['Date and Time Stamp'], filt36_3['Spot Price'], linestyle='solid',
              color='c', label='a1.large')
plt.plot_date(filt36_4['Date and Time Stamp'], filt36_4['Spot Price'], linestyle='solid',
              color='r', label='a1.xlarge')
plt.plot_date(filt36_5['Date and Time Stamp'], filt36_5['Spot Price'], linestyle='solid',
              color='y', label='a1.2xlarge')
plt.plot_date(filt36_6['Date and Time Stamp'], filt36_6['Spot Price'], linestyle='solid',
              color='w', label='a1.4xlarge',alpha=0.5)

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of all a1 Instance Types for Linux/UNIX Products' + "\n" +
          'Within the us-east-1a Availability Zone')
plt.xlabel('Date')
plt.ylabel('Spot Price')

plt.legend(loc = (0.77,0.6))
plt.tight_layout()
#plt.savefig('timeseries12.png')
#plt.show()

In [None]:
# Time series of a1 ITs within the us-east-1b AZ for Linux/UNIX products
plt.style.use('seaborn-poster')

filt36_7 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.medium') &
           (df36['Product Description'] == 'Linux/UNIX')]
filt36_8 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.metal') &
           (df36['Product Description'] == 'Linux/UNIX')]
filt36_9 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.large') &
           (df36['Product Description'] == 'Linux/UNIX')]
filt36_10 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.xlarge') &
           (df36['Product Description'] == 'Linux/UNIX')]
filt36_11 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.2xlarge') &
           (df36['Product Description'] == 'Linux/UNIX')]
filt36_12 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.4xlarge') &
           (df36['Product Description'] == 'Linux/UNIX')]

plt.plot_date(filt36_7['Date and Time Stamp'], filt36_7['Spot Price'], linestyle='solid',
              color='k', label='a1.medium')
plt.plot_date(filt36_8['Date and Time Stamp'], filt36_8['Spot Price'], linestyle='solid',
              color='b', label='a1.metal')
plt.plot_date(filt36_9['Date and Time Stamp'], filt36_9['Spot Price'], linestyle='solid',
              color='c', label='a1.large')
plt.plot_date(filt36_10['Date and Time Stamp'], filt36_10['Spot Price'], linestyle='solid',
              color='r', label='a1.xlarge')
plt.plot_date(filt36_11['Date and Time Stamp'], filt36_11['Spot Price'], linestyle='solid',
              color='y', label='a1.2xlarge')
plt.plot_date(filt36_12['Date and Time Stamp'], filt36_12['Spot Price'], linestyle='solid',
              color='w', label='a1.4xlarge',alpha=0.5)

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of all a1 Instance Types for Linux/UNIX Products' + "\n" +
          'Within the us-east-1b Availability Zone')
plt.xlabel('Date')
plt.ylabel('Spot Price')

plt.legend(loc = (0.77,0.6))
plt.tight_layout()
#plt.savefig('timeseries13.png')
#plt.show()

In [None]:
# Time series of a1 ITs within the us-east-1c AZ for Linux/UNIX products
plt.style.use('seaborn-poster')

filt36_13 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.medium') &
           (df36['Product Description'] == 'Linux/UNIX')]
filt36_14 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.metal') &
           (df36['Product Description'] == 'Linux/UNIX')]
filt36_15 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.large') &
           (df36['Product Description'] == 'Linux/UNIX')]
filt36_16 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.xlarge') &
           (df36['Product Description'] == 'Linux/UNIX')]
filt36_17 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.2xlarge') &
           (df36['Product Description'] == 'Linux/UNIX')]
filt36_18 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.4xlarge') &
           (df36['Product Description'] == 'Linux/UNIX')]

plt.plot_date(filt36_13['Date and Time Stamp'], filt36_13['Spot Price'], linestyle='solid',
              color='k', label='a1.medium')
plt.plot_date(filt36_14['Date and Time Stamp'], filt36_14['Spot Price'], linestyle='solid',
              color='b', label='a1.metal')
plt.plot_date(filt36_15['Date and Time Stamp'], filt36_15['Spot Price'], linestyle='solid',
              color='c', label='a1.large')
plt.plot_date(filt36_16['Date and Time Stamp'], filt36_16['Spot Price'], linestyle='solid',
              color='r', label='a1.xlarge')
plt.plot_date(filt36_17['Date and Time Stamp'], filt36_17['Spot Price'], linestyle='solid',
              color='y', label='a1.2xlarge')
plt.plot_date(filt36_18['Date and Time Stamp'], filt36_18['Spot Price'], linestyle='solid',
              color='w', label='a1.4xlarge',alpha=0.5)

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of all a1 Instance Types for Linux/UNIX Products' + "\n" +
          'Within the us-east-1c Availability Zone')
plt.xlabel('Date')
plt.ylabel('Spot Price')

plt.legend(loc = (0.77,0.6))
plt.tight_layout()
#plt.savefig('timeseries14.png')
#plt.show()

In [None]:
# Time series of a1 ITs within the us-east-1a AZ for Red Hat Enterprise Linux products
plt.style.use('seaborn-poster')

filt36_19 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.medium') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]
filt36_20 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.metal') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]
filt36_21 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.large') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]
filt36_22 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.xlarge') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]
filt36_23 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.2xlarge') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]
filt36_24 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.4xlarge') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]

plt.plot_date(filt36_19['Date and Time Stamp'], filt36_19['Spot Price'], linestyle='solid',
              color='k', label='a1.medium')
plt.plot_date(filt36_20['Date and Time Stamp'], filt36_20['Spot Price'], linestyle='solid',
              color='b', label='a1.metal')
plt.plot_date(filt36_21['Date and Time Stamp'], filt36_21['Spot Price'], linestyle='solid',
              color='c', label='a1.large')
plt.plot_date(filt36_22['Date and Time Stamp'], filt36_22['Spot Price'], linestyle='solid',
              color='r', label='a1.xlarge')
plt.plot_date(filt36_23['Date and Time Stamp'], filt36_23['Spot Price'], linestyle='solid',
              color='y', label='a1.2xlarge')
plt.plot_date(filt36_24['Date and Time Stamp'], filt36_24['Spot Price'], linestyle='solid',
              color='w', label='a1.4xlarge',alpha=0.5)

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of all a1 Instance Types for Red Hat Enterprise' + "\n" + 
          'Linux Products Within the us-east-1a Availability Zone')
plt.xlabel('Date')
plt.ylabel('Spot Price')

plt.legend(loc = (0.77,0.3))
plt.tight_layout()
#plt.savefig('timeseries15.png')
#plt.show()

In [None]:
# Time series of a1 ITs within the us-east-1b AZ for Red Hat Enterprise Linux products
plt.style.use('seaborn-poster')

filt36_25 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.medium') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]
filt36_26 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.metal') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]
filt36_27 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.large') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]
filt36_28 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.xlarge') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]
filt36_29 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.2xlarge') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]
filt36_30 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.4xlarge') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]

plt.plot_date(filt36_25['Date and Time Stamp'], filt36_25['Spot Price'], linestyle='solid',
              color='k', label='a1.medium')
plt.plot_date(filt36_26['Date and Time Stamp'], filt36_26['Spot Price'], linestyle='solid',
              color='b', label='a1.metal')
plt.plot_date(filt36_27['Date and Time Stamp'], filt36_27['Spot Price'], linestyle='solid',
              color='c', label='a1.large')
plt.plot_date(filt36_28['Date and Time Stamp'], filt36_28['Spot Price'], linestyle='solid',
              color='r', label='a1.xlarge')
plt.plot_date(filt36_29['Date and Time Stamp'], filt36_29['Spot Price'], linestyle='solid',
              color='y', label='a1.2xlarge')
plt.plot_date(filt36_30['Date and Time Stamp'], filt36_30['Spot Price'], linestyle='solid',
              color='w', label='a1.4xlarge',alpha=0.5)

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of all a1 Instance Types for Red Hat Enterprise' + "\n" + 
          'Linux Products Within the us-east-1b Availability Zone')
plt.xlabel('Date')
plt.ylabel('Spot Price')

plt.legend(loc = (0.77,0.3))
plt.tight_layout()
#plt.savefig('timeseries16.png')
#plt.show()

In [None]:
# Time series of a1 ITs within the us-east-1c AZ for Red Hat Enterprise Linux products
plt.style.use('seaborn-poster')

filt36_31 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.medium') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]
filt36_32 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.metal') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]
filt36_33 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.large') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]
filt36_34 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.xlarge') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]
filt36_35 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.2xlarge') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]
filt36_36 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.4xlarge') &
           (df36['Product Description'] == 'Red Hat Enterprise Linux')]

plt.plot_date(filt36_31['Date and Time Stamp'], filt36_31['Spot Price'], linestyle='solid',
              color='k', label='a1.medium')
plt.plot_date(filt36_32['Date and Time Stamp'], filt36_32['Spot Price'], linestyle='solid',
              color='b', label='a1.metal')
plt.plot_date(filt36_33['Date and Time Stamp'], filt36_33['Spot Price'], linestyle='solid',
              color='c', label='a1.large')
plt.plot_date(filt36_34['Date and Time Stamp'], filt36_34['Spot Price'], linestyle='solid',
              color='r', label='a1.xlarge')
plt.plot_date(filt36_35['Date and Time Stamp'], filt36_35['Spot Price'], linestyle='solid',
              color='y', label='a1.2xlarge')
plt.plot_date(filt36_36['Date and Time Stamp'], filt36_36['Spot Price'], linestyle='solid',
              color='w', label='a1.4xlarge',alpha=0.5)

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of all a1 Instance Types for Red Hat Enterprise' + "\n" + 
          'Linux Products Within the us-east-1c Availability Zone')
plt.xlabel('Date')
plt.ylabel('Spot Price')

plt.legend(loc = (0.77,0.3))
plt.tight_layout()
#plt.savefig('timeseries17.png')
#plt.show()

In [None]:
# Time series of a1 ITs within the us-east-1a AZ for SUSE Linux products
plt.style.use('seaborn-poster')

filt36_37 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.medium') &
           (df36['Product Description'] == 'SUSE Linux')]
filt36_38 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.metal') &
           (df36['Product Description'] == 'SUSE Linux')]
filt36_39 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.large') &
           (df36['Product Description'] == 'SUSE Linux')]
filt36_40 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.xlarge') &
           (df36['Product Description'] == 'SUSE Linux')]
filt36_41 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.2xlarge') &
           (df36['Product Description'] == 'SUSE Linux')]
filt36_42 = df36.loc[(df36['Availability Zone'] == 'us-east-1a') & (df36['Instance Type'] == 'a1.4xlarge') &
           (df36['Product Description'] == 'SUSE Linux')]

plt.plot_date(filt36_37['Date and Time Stamp'], filt36_37['Spot Price'], linestyle='solid',
              color='k', label='a1.medium')
plt.plot_date(filt36_38['Date and Time Stamp'], filt36_38['Spot Price'], linestyle='solid',
              color='b', label='a1.metal')
plt.plot_date(filt36_39['Date and Time Stamp'], filt36_39['Spot Price'], linestyle='solid',
              color='c', label='a1.large')
plt.plot_date(filt36_40['Date and Time Stamp'], filt36_40['Spot Price'], linestyle='solid',
              color='r', label='a1.xlarge')
plt.plot_date(filt36_41['Date and Time Stamp'], filt36_41['Spot Price'], linestyle='solid',
              color='y', label='a1.2xlarge')
plt.plot_date(filt36_42['Date and Time Stamp'], filt36_42['Spot Price'], linestyle='solid',
              color='w', label='a1.4xlarge',alpha=0.5)

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of all a1 Instance Types for SUSE Linux' + "\n" + 
          'Products Within the us-east-1a Availability Zone')
plt.xlabel('Date')
plt.ylabel('Spot Price')

plt.legend(loc = (0.77,0.65))
plt.tight_layout()
#plt.savefig('timeseries18.png')
#plt.show()

In [None]:
# Time series of a1 ITs within the us-east-1b AZ for SUSE Linux products
plt.style.use('seaborn-poster')

filt36_43 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.medium') &
           (df36['Product Description'] == 'SUSE Linux')]
filt36_44 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.metal') &
           (df36['Product Description'] == 'SUSE Linux')]
filt36_45 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.large') &
           (df36['Product Description'] == 'SUSE Linux')]
filt36_46 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.xlarge') &
           (df36['Product Description'] == 'SUSE Linux')]
filt36_47 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.2xlarge') &
           (df36['Product Description'] == 'SUSE Linux')]
filt36_48 = df36.loc[(df36['Availability Zone'] == 'us-east-1b') & (df36['Instance Type'] == 'a1.4xlarge') &
           (df36['Product Description'] == 'SUSE Linux')]

plt.plot_date(filt36_43['Date and Time Stamp'], filt36_43['Spot Price'], linestyle='solid',
              color='k', label='a1.medium')
plt.plot_date(filt36_44['Date and Time Stamp'], filt36_44['Spot Price'], linestyle='solid',
              color='b', label='a1.metal')
plt.plot_date(filt36_45['Date and Time Stamp'], filt36_45['Spot Price'], linestyle='solid',
              color='c', label='a1.large')
plt.plot_date(filt36_46['Date and Time Stamp'], filt36_46['Spot Price'], linestyle='solid',
              color='r', label='a1.xlarge')
plt.plot_date(filt36_47['Date and Time Stamp'], filt36_47['Spot Price'], linestyle='solid',
              color='y', label='a1.2xlarge')
plt.plot_date(filt36_48['Date and Time Stamp'], filt36_48['Spot Price'], linestyle='solid',
              color='w', label='a1.4xlarge',alpha=0.5)

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of all a1 Instance Types for SUSE Linux' + "\n" + 
          'Products Within the us-east-1b Availability Zone')
plt.xlabel('Date')
plt.ylabel('Spot Price')

plt.legend(loc = (0.77,0.65))
plt.tight_layout()
#plt.savefig('timeseries19.png')
#plt.show()

In [None]:
# Time series of a1 ITs within the us-east-1c AZ for SUSE Linux products
plt.style.use('seaborn-poster')

filt36_49 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.medium') &
           (df36['Product Description'] == 'SUSE Linux')]
filt36_50 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.metal') &
           (df36['Product Description'] == 'SUSE Linux')]
filt36_51 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.large') &
           (df36['Product Description'] == 'SUSE Linux')]
filt36_52 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.xlarge') &
           (df36['Product Description'] == 'SUSE Linux')]
filt36_53 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.2xlarge') &
           (df36['Product Description'] == 'SUSE Linux')]
filt36_54 = df36.loc[(df36['Availability Zone'] == 'us-east-1c') & (df36['Instance Type'] == 'a1.4xlarge') &
           (df36['Product Description'] == 'SUSE Linux')]

plt.plot_date(filt36_49['Date and Time Stamp'], filt36_49['Spot Price'], linestyle='solid',
              color='k', label='a1.medium')
plt.plot_date(filt36_50['Date and Time Stamp'], filt36_50['Spot Price'], linestyle='solid',
              color='b', label='a1.metal')
plt.plot_date(filt36_51['Date and Time Stamp'], filt36_51['Spot Price'], linestyle='solid',
              color='c', label='a1.large')
plt.plot_date(filt36_52['Date and Time Stamp'], filt36_52['Spot Price'], linestyle='solid',
              color='r', label='a1.xlarge')
plt.plot_date(filt36_53['Date and Time Stamp'], filt36_53['Spot Price'], linestyle='solid',
              color='y', label='a1.2xlarge')
plt.plot_date(filt36_54['Date and Time Stamp'], filt36_54['Spot Price'], linestyle='solid',
              color='w', label='a1.4xlarge',alpha=0.5)

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of all a1 Instance Types for SUSE Linux' + "\n" + 
          'Products Within the us-east-1c Availability Zone')
plt.xlabel('Date')
plt.ylabel('Spot Price')

plt.legend(loc = (0.77,0.65))
plt.tight_layout()
#plt.savefig('timeseries20.png')
#plt.show()

In [None]:
(df36.loc[df36['Instance Type'].str.contains("f1")])['Instance Type'].unique()

In [None]:
df37 = df36.loc[df36['Product Description'] == 'Windows']
df37

In [None]:
df37['Instance Type'].unique()

In [None]:
(df37.loc[(df37['Instance Type'].str.contains("z1"))])['Instance Type'].unique()

In [None]:
# Time series of highest spot price ITs within df37 in us-east-1c
plt.style.use('seaborn-poster')

filt37_1 = df37.loc[(df37['Availability Zone'] == 'us-east-1c') & (df37['Instance Type'] == 'g4dn.metal') &
           (df37['Product Description'] == 'Windows')]
filt37_2 = df37.loc[(df37['Availability Zone'] == 'us-east-1c') & (df37['Instance Type'] == 'h1.16xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_3 = df37.loc[(df37['Availability Zone'] == 'us-east-1c') & (df37['Instance Type'] == 'i3.16xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_4 = df37.loc[(df37['Availability Zone'] == 'us-east-1c') & (df37['Instance Type'] == 'i3.metal') &
           (df37['Product Description'] == 'Windows')]
filt37_5 = df37.loc[(df37['Availability Zone'] == 'us-east-1c') & (df37['Instance Type'] == 'i3en.24xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_6 = df37.loc[(df37['Availability Zone'] == 'us-east-1c') & (df37['Instance Type'] == 'z1d.metal') &
           (df37['Product Description'] == 'Windows')]

plt.plot_date(filt37_1['Date and Time Stamp'], filt37_1['Spot Price'], linestyle='solid',
              color='k', label='g4dn.metal')
plt.plot_date(filt37_2['Date and Time Stamp'], filt37_2['Spot Price'], linestyle='solid',
              color='m', label='h1.16xlarge')
plt.plot_date(filt37_3['Date and Time Stamp'], filt37_3['Spot Price'], linestyle='solid',
              color='b', label='i3.16xlarge')
plt.plot_date(filt37_4['Date and Time Stamp'], filt37_4['Spot Price'], linestyle='solid',
              color='w', label='i3.metal',alpha=0.5)
plt.plot_date(filt37_5['Date and Time Stamp'], filt37_5['Spot Price'], linestyle='solid',
              color='g', label='i3en.24xlarge')
plt.plot_date(filt37_6['Date and Time Stamp'], filt37_6['Spot Price'], linestyle='solid',
              color='c', label='z1d.metal')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the Latest Generation Instance Types Across' + "\n" + 
          'Windows Products Within the us-east-1a Availability Zone')
plt.xlabel('Date')
plt.ylabel('Spot Price')
plt.yticks([3,3.5,4,4.5,5,5.5,6,6.5,7,7.5,8])

plt.legend(loc = (0.77,0.40))
plt.tight_layout()
#plt.savefig('timeseries21.png')
#plt.show()

In [None]:
# Time series of highest spot price ITs within df37 in us-east-1f
plt.style.use('seaborn-poster')

filt37_1 = df37.loc[(df37['Availability Zone'] == 'us-east-1f') & (df37['Instance Type'] == 'g4dn.metal') &
           (df37['Product Description'] == 'Windows')]
filt37_2 = df37.loc[(df37['Availability Zone'] == 'us-east-1f') & (df37['Instance Type'] == 'h1.16xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_3 = df37.loc[(df37['Availability Zone'] == 'us-east-1f') & (df37['Instance Type'] == 'i3.16xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_4 = df37.loc[(df37['Availability Zone'] == 'us-east-1f') & (df37['Instance Type'] == 'i3.metal') &
           (df37['Product Description'] == 'Windows')]
filt37_5 = df37.loc[(df37['Availability Zone'] == 'us-east-1f') & (df37['Instance Type'] == 'i3en.24xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_6 = df37.loc[(df37['Availability Zone'] == 'us-east-1f') & (df37['Instance Type'] == 'z1d.metal') &
           (df37['Product Description'] == 'Windows')]

plt.plot_date(filt37_1['Date and Time Stamp'], filt37_1['Spot Price'], linestyle='solid',
              color='k', label='g4dn.metal')
plt.plot_date(filt37_2['Date and Time Stamp'], filt37_2['Spot Price'], linestyle='solid',
              color='m', label='h1.16xlarge')
plt.plot_date(filt37_3['Date and Time Stamp'], filt37_3['Spot Price'], linestyle='solid',
              color='b', label='i3.16xlarge')
plt.plot_date(filt37_4['Date and Time Stamp'], filt37_4['Spot Price'], linestyle='solid',
              color='w', label='i3.metal',alpha=0.5)
plt.plot_date(filt37_5['Date and Time Stamp'], filt37_5['Spot Price'], linestyle='solid',
              color='g', label='i3en.24xlarge')
plt.plot_date(filt37_6['Date and Time Stamp'], filt37_6['Spot Price'], linestyle='solid',
              color='c', label='z1d.metal')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the Latest Generation Instance Types Across' + "\n" + 
          'Windows Products Within the us-east-1f Availability Zone')
plt.xlabel('Date')
plt.ylabel('Spot Price')
plt.yticks([3,3.5,4,4.5,5,5.5,6,6.5,7,7.5,8,8.5,9,9.5])

plt.legend(loc = (0.77,0.25))
plt.tight_layout()
#plt.savefig('timeseries22.png')
#plt.show()

In [None]:
# Time series of highest spot price ITs within df37 in us-east-1a
plt.style.use('seaborn-poster')

filt37_1 = df37.loc[(df37['Availability Zone'] == 'us-east-1a') & (df37['Instance Type'] == 'g4dn.metal') &
           (df37['Product Description'] == 'Windows')]
filt37_2 = df37.loc[(df37['Availability Zone'] == 'us-east-1a') & (df37['Instance Type'] == 'h1.16xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_3 = df37.loc[(df37['Availability Zone'] == 'us-east-1a') & (df37['Instance Type'] == 'i3.16xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_4 = df37.loc[(df37['Availability Zone'] == 'us-east-1a') & (df37['Instance Type'] == 'i3.metal') &
           (df37['Product Description'] == 'Windows')]
filt37_5 = df37.loc[(df37['Availability Zone'] == 'us-east-1a') & (df37['Instance Type'] == 'i3en.24xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_6 = df37.loc[(df37['Availability Zone'] == 'us-east-1a') & (df37['Instance Type'] == 'z1d.metal') &
           (df37['Product Description'] == 'Windows')]

plt.plot_date(filt37_1['Date and Time Stamp'], filt37_1['Spot Price'], linestyle='solid',
              color='k', label='g4dn.metal')
plt.plot_date(filt37_2['Date and Time Stamp'], filt37_2['Spot Price'], linestyle='solid',
              color='m', label='h1.16xlarge')
plt.plot_date(filt37_3['Date and Time Stamp'], filt37_3['Spot Price'], linestyle='solid',
              color='b', label='i3.16xlarge')
plt.plot_date(filt37_4['Date and Time Stamp'], filt37_4['Spot Price'], linestyle='solid',
              color='w', label='i3.metal',alpha=0.5)
plt.plot_date(filt37_5['Date and Time Stamp'], filt37_5['Spot Price'], linestyle='solid',
              color='g', label='i3en.24xlarge')
plt.plot_date(filt37_6['Date and Time Stamp'], filt37_6['Spot Price'], linestyle='solid',
              color='c', label='z1d.metal')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the Latest Generation Instance Types Across' + "\n" + 
          'Windows Products Within the us-east-1a Availability Zone')
plt.xlabel('Date')
plt.ylabel('Spot Price')
plt.yticks([3,3.5,4,4.5,5,5.5,6,6.5,7,7.5,8])

plt.legend(loc = (0.77,0.31))
plt.tight_layout()
#plt.savefig('timeseries23.png')
#plt.show()

In [None]:
# Time series of highest spot price ITs within df37 in us-east-1b
plt.style.use('seaborn-poster')

filt37_1 = df37.loc[(df37['Availability Zone'] == 'us-east-1b') & (df37['Instance Type'] == 'g4dn.metal') &
           (df37['Product Description'] == 'Windows')]
filt37_2 = df37.loc[(df37['Availability Zone'] == 'us-east-1b') & (df37['Instance Type'] == 'h1.16xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_3 = df37.loc[(df37['Availability Zone'] == 'us-east-1b') & (df37['Instance Type'] == 'i3.16xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_4 = df37.loc[(df37['Availability Zone'] == 'us-east-1b') & (df37['Instance Type'] == 'i3.metal') &
           (df37['Product Description'] == 'Windows')]
filt37_5 = df37.loc[(df37['Availability Zone'] == 'us-east-1b') & (df37['Instance Type'] == 'i3en.24xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_6 = df37.loc[(df37['Availability Zone'] == 'us-east-1b') & (df37['Instance Type'] == 'z1d.metal') &
           (df37['Product Description'] == 'Windows')]

plt.plot_date(filt37_1['Date and Time Stamp'], filt37_1['Spot Price'], linestyle='solid',
              color='k', label='g4dn.metal')
plt.plot_date(filt37_2['Date and Time Stamp'], filt37_2['Spot Price'], linestyle='solid',
              color='m', label='h1.16xlarge')
plt.plot_date(filt37_3['Date and Time Stamp'], filt37_3['Spot Price'], linestyle='solid',
              color='b', label='i3.16xlarge')
plt.plot_date(filt37_4['Date and Time Stamp'], filt37_4['Spot Price'], linestyle='solid',
              color='w', label='i3.metal',alpha=0.5)
plt.plot_date(filt37_5['Date and Time Stamp'], filt37_5['Spot Price'], linestyle='solid',
              color='g', label='i3en.24xlarge')
plt.plot_date(filt37_6['Date and Time Stamp'], filt37_6['Spot Price'], linestyle='solid',
              color='c', label='z1d.metal')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the Latest Generation Instance Types Across' + "\n" + 
          'Windows Products Within the us-east-1b Availability Zone')
plt.xlabel('Date')
plt.ylabel('Spot Price')
plt.yticks([3,3.5,4,4.5,5,5.5,6,6.5,7,7.5,8])

plt.legend(loc = (0.77,0.31))
plt.tight_layout()
#plt.savefig('timeseries24.png')
#plt.show()

In [None]:
# Time series of highest spot price ITs within df37 in us-east-1d
plt.style.use('seaborn-poster')

filt37_1 = df37.loc[(df37['Availability Zone'] == 'us-east-1d') & (df37['Instance Type'] == 'g4dn.metal') &
           (df37['Product Description'] == 'Windows')]
filt37_2 = df37.loc[(df37['Availability Zone'] == 'us-east-1d') & (df37['Instance Type'] == 'h1.16xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_3 = df37.loc[(df37['Availability Zone'] == 'us-east-1d') & (df37['Instance Type'] == 'i3.16xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_4 = df37.loc[(df37['Availability Zone'] == 'us-east-1d') & (df37['Instance Type'] == 'i3.metal') &
           (df37['Product Description'] == 'Windows')]
filt37_5 = df37.loc[(df37['Availability Zone'] == 'us-east-1d') & (df37['Instance Type'] == 'i3en.24xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_6 = df37.loc[(df37['Availability Zone'] == 'us-east-1d') & (df37['Instance Type'] == 'z1d.metal') &
           (df37['Product Description'] == 'Windows')]

plt.plot_date(filt37_1['Date and Time Stamp'], filt37_1['Spot Price'], linestyle='solid',
              color='k', label='g4dn.metal')
plt.plot_date(filt37_2['Date and Time Stamp'], filt37_2['Spot Price'], linestyle='solid',
              color='m', label='h1.16xlarge (N/A)')
plt.plot_date(filt37_3['Date and Time Stamp'], filt37_3['Spot Price'], linestyle='solid',
              color='b', label='i3.16xlarge')
plt.plot_date(filt37_4['Date and Time Stamp'], filt37_4['Spot Price'], linestyle='solid',
              color='w', label='i3.metal',alpha=0.5)
plt.plot_date(filt37_5['Date and Time Stamp'], filt37_5['Spot Price'], linestyle='solid',
              color='g', label='i3en.24xlarge')
plt.plot_date(filt37_6['Date and Time Stamp'], filt37_6['Spot Price'], linestyle='solid',
              color='c', label='z1d.metal')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the Latest Generation Instance Types Across' + "\n" + 
          'Windows Products Within the us-east-1d Availability Zone')
plt.xlabel('Date')
plt.ylabel('Spot Price')
plt.yticks([3,3.5,4,4.5,5,5.5,6,6.5,7,7.5,8])

plt.legend(loc = (0.77,0.31))
plt.tight_layout()
#plt.savefig('timeseries25.png')
#plt.show()

In [None]:
# Time series of highest spot price ITs within df37 in us-east-1e
plt.style.use('seaborn-poster')

filt37_1 = df37.loc[(df37['Availability Zone'] == 'us-east-1e') & (df37['Instance Type'] == 'g4dn.metal') &
           (df37['Product Description'] == 'Windows')]
filt37_2 = df37.loc[(df37['Availability Zone'] == 'us-east-1e') & (df37['Instance Type'] == 'h1.16xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_3 = df37.loc[(df37['Availability Zone'] == 'us-east-1e') & (df37['Instance Type'] == 'i3.16xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_4 = df37.loc[(df37['Availability Zone'] == 'us-east-1e') & (df37['Instance Type'] == 'i3.metal') &
           (df37['Product Description'] == 'Windows')]
filt37_5 = df37.loc[(df37['Availability Zone'] == 'us-east-1e') & (df37['Instance Type'] == 'i3en.24xlarge') &
           (df37['Product Description'] == 'Windows')]
filt37_6 = df37.loc[(df37['Availability Zone'] == 'us-east-1e') & (df37['Instance Type'] == 'z1d.metal') &
           (df37['Product Description'] == 'Windows')]

plt.plot_date(filt37_1['Date and Time Stamp'], filt37_1['Spot Price'], linestyle='solid',
              color='k', label='g4dn.metal (N/A)')
plt.plot_date(filt37_2['Date and Time Stamp'], filt37_2['Spot Price'], linestyle='solid',
              color='m', label='h1.16xlarge (N/A)')
plt.plot_date(filt37_3['Date and Time Stamp'], filt37_3['Spot Price'], linestyle='solid',
              color='b', label='i3.16xlarge')
plt.plot_date(filt37_4['Date and Time Stamp'], filt37_4['Spot Price'], linestyle='solid',
              color='w', label='i3.metal (N/A)',alpha=0.5)
plt.plot_date(filt37_5['Date and Time Stamp'], filt37_5['Spot Price'], linestyle='solid',
              color='g', label='i3en.24xlarge (N/A)')
plt.plot_date(filt37_6['Date and Time Stamp'], filt37_6['Spot Price'], linestyle='solid',
              color='c', label='z1d.metal (N/A)')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the Latest Generation Instance Types Across' + "\n" + 
          'Windows Products Within the us-east-1e Availability Zone')
plt.xlabel('Date')
plt.ylabel('Spot Price')

plt.legend(loc=(0.75,0.71))
plt.tight_layout()
#plt.savefig('timeseries26.png')
#plt.show()

In [None]:
# Time series of g4dn.metal IT across Windows products and the us-east-1f AZ between 4/20/2021
    # and 5/01/2021
plt.style.use('seaborn-poster')

filt37_specg4 = df37.loc[(df37['Date and Time Stamp'] >= '2021-04-20 00:00:00+00:00') &
         (df37['Date and Time Stamp'] < '2021-05-02 00:00:00+00:00') &
         (df37['Instance Type'] == 'g4dn.metal') & (df37['Availability Zone'] == 'us-east-1f')]

plt.plot_date(filt37_specg4['Date and Time Stamp'], filt37_specg4['Spot Price'], linestyle='solid',
              color='#14c4db')

plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%b %d, %Y')
plt.gca().xaxis.set_major_formatter(date_format)

plt.title('Time Series Plot of the g4dn.metal Instance Type Across Windows Products' + "\n" + 
          'and the us-east-1f Availability Zone, Between 4/20/2021 and 5/01/2021')
plt.xlabel('Date')
plt.ylabel('Spot Price')

#plt.tight_layout()
#plt.savefig('timeseries27.png')
#plt.show()

In [None]:
# Time series of specified OS across ALL ITs and AZs: FOR LOOP

Instances = ['p4d.24xlarge']
OS = ['Linux/UNIX']

for o in OS:
    temp = df38[df38.OS == o]
    Instances = temp.IT.unique()

    for i in tqdm(Instances):
        temp1 = temp[temp.IT == i]
        
        if temp1.SP.max() > 3*temp1.SP.min():
            plt.figure()
            plt.style.use('seaborn-poster')
            
            for a in temp1.AZ.unique():
                temp2 = temp1[temp1.AZ == a]
                plt.plot_date(temp2['DT'], temp2['SP'], linestyle='solid', label=a)

                plt.gcf().autofmt_xdate()
                date_format = mpl_dates.DateFormatter('%b %d, %Y')
                plt.gca().xaxis.set_major_formatter(date_format)

            plt.title('Time Series Plot of the ' + i + ' Instance Type Across us-east-1' + "\n" +
                      'Availability Zones and ' + o)
            plt.xlabel('Date')
            plt.ylabel('Spot Price')

            plt.legend()
            plt.tight_layout()

            #plt.savefig(i + '_tsfig.png')

            #plt.show()

In [None]:
## Everything above was done on Monday, 6/14/2021

In [None]:
df40 = df38.loc[df38['IT'].str.contains("a1|c6|df3|f1|g4|h1|i3|inf1|m6|p4|r6|t4|x2|z1")]
df40

In [None]:
df40['IT'].unique() #IT choices for below for loop

In [None]:
df40['OS'].unique() #OS choices for below for loop

In [None]:
# Time series of specified OS and IT across ALL AZs: FOR LOOP

Instances = ['p4d.24xlarge'] #choose what you want
OS = ['SUSE Linux'] #choose what you want

for o in OS:
    temp = df40[df40.OS == o] #latest gen ITs
    #temp = df38[df38.OS == o] #all ITs

    for i in tqdm(Instances):
        temp1 = temp[temp.IT == i]
            
        for a in temp1.AZ.unique():
            temp2 = temp1[temp1.AZ == a]
            plt.style.use('seaborn-poster')
            plt.plot_date(temp2['DT'], temp2['SP'], linestyle='solid', label=a)

            plt.gcf().autofmt_xdate()
            date_format = mpl_dates.DateFormatter('%b %d, %Y')
            plt.gca().xaxis.set_major_formatter(date_format)

            plt.title('Time Series Plot of the ' + i + ' Instance Type Across us-east-1' + "\n" +
                      'Availability Zones and ' + o + ' Operating Systems')
            plt.xlabel('Date')
            plt.ylabel('Spot Price')

            plt.legend()
            plt.tight_layout()

            #plt.savefig(i + '_AZ_ts.png')
        plt.figure()
        #plt.show()

In [None]:
df39 = df37.copy()
df39.columns=['AZ','IT','OS','SP','DT']
df39['IT'].unique()

In [None]:
g_filt = df39['IT'].str.contains("g4")
print(df39.loc[g_filt, 'SP'].describe().round(4))
# five number summary for g4 ITs within Windows PDs across all AZs

In [None]:
h_filt = df39['IT'].str.contains("h1")
print(df39.loc[h_filt, 'SP'].describe().round(4))
# five number summary for h1 ITs within Windows PDs across all AZs

In [None]:
i_filt = df39['IT'].str.contains("i3")
print(df39.loc[i_filt, 'SP'].describe().round(4))
# five number summary for i3 ITs within Windows PDs across all AZs

In [None]:
z_filt = df39['IT'].str.contains("z1")
print(df39.loc[z_filt, 'SP'].describe().round(4))
# five number summary for z1 ITs within Windows PDs across all AZs

## Integration

In [None]:
## Integration Function
    # adapted from https://nbviewer.jupyter.org/gist/metakermit/5720498

def integrate_method(self, how='trapz', unit='s'):
    '''Numerically integrate the time series.

    @param how: the method to use (trapz by default)
    @return 

    Available methods:
     * trapz - trapezoidal
     * cumtrapz - cumulative trapezoidal
     * simps - Simpson's rule
     * romb - Romberger's rule

    See http://docs.scipy.org/doc/scipy/reference/integrate.html for the method details.
    or the source code
    https://github.com/scipy/scipy/blob/master/scipy/integrate/quadrature.py
    '''
    available_rules = set(['trapz', 'cumtrapz', 'simps', 'romb'])
    if how in available_rules:
        rule = integrate.__getattribute__(how)
    else:
        print('Unsupported integration rule: %s' % (how))
        print('Expecting one of these sample-based integration rules: %s' % (str(list(available_rules))))
        raise AttributeError
    
    result = rule(self.values, self.index.astype(np.int64) / 10**9)
    #result = rule(self.values)
    return result

pd.Series.integrate = integrate_method

In [None]:
df38

In [None]:
df_x1 = df38.loc[(df38['AZ'] == 'us-east-1c') & (df38['IT'] == 'p4d.24xlarge')
                & (df38['OS'] == 'Linux/UNIX')]
x1 = df_x1['SP'].values
y1 = df_x1['DT'].values
#ts1 = pd.Series(x1, pd.date_range(start='2021-03-04 09:09:55+00:00', end='2021-06-01 23:13:12+00:00', periods=len(x1)))
ts1 = pd.Series(x1,y1)

In [None]:
print(ts1.integrate().round(4))
print((ts1.integrate().round(4)/3600).round(4))

In [None]:
print(df_x1['SP'].mean().round(4))
print(2160*df_x1['SP'].mean().round(4))

In [None]:
print(ts1.integrate('cumtrapz').round(4))
print((ts1.integrate('cumtrapz').round(4)/3600).round(4))

In [None]:
print(ts1.integrate('simps').round(4))
print((ts1.integrate('simps').round(4)/3600).round(4))

In [None]:
df_x2 = df38.loc[(df38['AZ'] == 'us-east-1a') & (df38['IT'] == 'a1.2xlarge')
                & (df38['OS'] == 'Linux/UNIX')]
x2 = df_x2['SP'].values
y2 = df_x2['DT'].values
ts2 = pd.Series(x2,y2)

print(ts2.integrate().round(4)) #result of integration function
print((ts2.integrate().round(4)/3600).round(4)) #result of integration function divided by 60 twice

print(' ')

print(df_x2['SP'].mean().round(4)) #mean spot price of specified TS
print(2160*df_x2['SP'].mean().round(4)) #mean spot price of specified TS multiplied by 2000

In [None]:
df_x3 = df38.loc[(df38['AZ'] == 'us-east-1f') & (df38['IT'] == 'z1d.xlarge')
                & (df38['OS'] == 'Windows')]
x3 = df_x3['SP'].values
y3 = df_x3['DT'].values
ts3 = pd.Series(x3,y3)

print(ts3.integrate().round(4)) #result of integration function
print((ts3.integrate().round(4)/3600).round(4)) #result of integration function divided by 60 twice

print(' ')

print(df_x3['SP'].mean().round(4)) #mean spot price of specified TS
print((2160*df_x3['SP'].mean()).round(4)) #mean spot price of specified TS multiplied by 2000

In [None]:
df_x4 = df38.loc[(df38['AZ'] == 'us-east-1b') & (df38['IT'] == 'c6gd.metal')
                & (df38['OS'] == 'SUSE Linux')]
x4 = df_x4['SP'].values
y4 = df_x4['DT'].values
ts4 = pd.Series(x4,y4)

print(ts4.integrate().round(4)) #result of integration function
print((ts4.integrate().round(4)/3600).round(4)) #result of integration function divided by 60 twice

print(' ')

print(df_x4['SP'].mean().round(4)) #mean spot price of specified TS
print((2160*df_x4['SP'].mean()).round(4)) #mean spot price of specified TS multiplied by 2000

In [None]:
df_x5 = df38.loc[(df38['AZ'] == 'us-east-1d') & (df38['IT'] == 'm6gd.xlarge')
                & (df38['OS'] == 'Red Hat Enterprise Linux')]
x5 = df_x5['SP'].values
y5 = df_x5['DT'].values
ts5 = pd.Series(x5,y5)

print(ts5.integrate().round(4)) #result of integration function
print((ts5.integrate().round(4)/3600).round(4)) #result of integration function divided by 60 twice

print(' ')

print(df_x5['SP'].mean().round(4)) #mean spot price of specified TS
print((2160*df_x5['SP'].mean()).round(4)) #mean spot price of specified TS multiplied by 2000

In [None]:
df_x6 = df38.loc[(df38['AZ'] == 'us-east-1e') & (df38['IT'] == 'f1.16xlarge')
                & (df38['OS'] == 'Linux/UNIX')]
x6 = df_x6['SP'].values
y6 = df_x6['DT'].values
ts6 = pd.Series(x6,y6)

print(ts6.integrate().round(4)) #result of integration function
print((ts6.integrate().round(4)/3600).round(4)) #result of integration function divided by 60 twice

print(' ')

print(df_x6['SP'].mean().round(4)) #mean spot price of specified TS
print((2160*df_x6['SP'].mean()).round(4)) #mean spot price of specified TS multiplied by 2000

In [None]:
## Integration Function 2: Divides Original Integration Function by 3600 (in the function)
    # adapted from https://nbviewer.jupyter.org/gist/metakermit/5720498

def integrate_method2(self, how='trapz', unit='s'):
    '''Numerically integrate the time series.

    @param how: the method to use (trapz by default)
    @return 

    Available methods:
     * trapz - trapezoidal
     * cumtrapz - cumulative trapezoidal
     * simps - Simpson's rule
     * romb - Romberger's rule

    See http://docs.scipy.org/doc/scipy/reference/integrate.html for the method details.
    or the source code
    https://github.com/scipy/scipy/blob/master/scipy/integrate/quadrature.py
    '''
    available_rules = set(['trapz', 'cumtrapz', 'simps', 'romb'])
    if how in available_rules:
        rule = integrate.__getattribute__(how)
    else:
        print('Unsupported integration rule: %s' % (how))
        print('Expecting one of these sample-based integration rules: %s' % (str(list(available_rules))))
        raise AttributeError
    
    result = (rule(self.values, self.index.astype(np.int64) / 10**9))/3600
    #result = rule(self.values)
    return result

pd.Series.integrate2 = integrate_method2

In [None]:
df_x7 = df38.loc[(df38['AZ'] == 'us-east-1f') & (df38['IT'] == 'h1.16xlarge')
                & (df38['OS'] == 'SUSE Linux')]
x7 = df_x7['SP'].values
y7 = df_x7['DT'].values
ts7 = pd.Series(x7,y7)

print(ts7.integrate2().round(4))
print(' ')
print(df_x7['SP'].mean().round(4))
print(2160*df_x7['SP'].mean().round(4))

In [None]:
df_x8 = df38.loc[(df38['AZ'] == 'us-east-1d') & (df38['IT'] == 't4g.large')
                & (df38['OS'] == 'Red Hat Enterprise Linux')]
x8 = df_x8['SP'].values
y8 = df_x8['DT'].values
ts8 = pd.Series(x8,y8)

print(ts8.integrate2().round(4))
print(' ')
print(df_x8['SP'].mean().round(4))
print((2160*df_x8['SP'].mean()).round(4))

In [None]:
df_x9 = df38.loc[(df38['AZ'] == 'us-east-1e') & (df38['IT'] == 'm3.medium')
                & (df38['OS'] == 'Linux/UNIX')]
x9 = df_x9['SP'].values
y9 = df_x9['DT'].values
ts9 = pd.Series(x9,y9)

print(ts9.integrate2().round(4))
print(' ')
print(df_x9['SP'].mean().round(4))
print((2160*df_x9['SP'].mean()).round(4))

In [None]:
df_x11 = df38.loc[(df38['AZ'] == 'us-east-1b') & (df38['IT'] == 'c6gd.large')
                & (df38['OS'] == 'Red Hat Enterprise Linux')]
x11 = df_x11['SP'].values
y11 = df_x11['DT'].values
ts11 = pd.Series(x11,y11)

print(ts11.integrate2().round(4))
print(' ')
print(df_x11['SP'].mean().round(4))
print((2160*df_x11['SP'].mean()).round(4))

In [None]:
df_x12 = df38.loc[(df38['AZ'] == 'us-east-1a') & (df38['IT'] == 'x1e.xlarge')
                & (df38['OS'] == 'Windows')]
x12 = df_x12['SP'].values
y12 = df_x12['DT'].values
ts12 = pd.Series(x12,y12)

print(ts12.integrate2().round(4))
print(' ')
print(df_x12['SP'].mean().round(4))
print((2160*df_x12['SP'].mean()).round(4))

In [None]:
924.7107/2160 #integral value / 2160 is cost per hour of that instance type

## Interpolation

In [None]:
a1 = (df38.loc[(df38['AZ'] == 'us-east-1c') & (df38['IT'] == 'p4d.24xlarge') &
        (df38['OS'] == 'Linux/UNIX') & (df38['DT'] >= '2021-05-22')])['DT'].values

b1 = (df38.loc[(df38['AZ'] == 'us-east-1c') & (df38['IT'] == 'p4d.24xlarge') &
        (df38['OS'] == 'Linux/UNIX') & (df38['DT'] >= '2021-05-22')])['SP'].values

f1_1 = interp1d(a1,b1)
f1_2 = interp1d(a1,b1, kind='cubic')

al_new = (df38.loc[(df38['AZ'] == 'us-east-1c') & (df38['IT'] == 'p4d.24xlarge') &
        (df38['OS'] == 'Linux/UNIX')])['DT'].values

plt.plot(a1, b1, 'o', a1_new, f1_1(a1_new), '-', f1_2(a1_new), '--')
plt.legend(['data','linear','cubic'],loc='best')
plt.show()