In [None]:

#######################################################
#######################################################
############    COPYRIGHT - DATA SOCIETY   ############
#######################################################
#######################################################

## 10 TIMESERIES DAY3 ##

## NOTE: To run individual pieces of code, select the line of code and
##       press ctrl + enter for PCs or command + enter for Macs



In [None]:
#=================================================-
#### Slide 3: Import packages  ####

import os
import pandas as pd
from pandas.plotting import lag_plot
import numpy as np
import pickle
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from statsmodels.tsa.stattools import acf
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf



In [None]:
#=================================================-
#### Slide 4: Directory settings  ####

# Set `main_dir` to the location of your `af-werx` folder (for Linux).
main_dir = "/home/[username]/Desktop/af-werx"
# Set `main_dir` to the location of your `af-werx` folder (for Mac).
main_dir = '/Users/[username]/Desktop/af-werx'
# Set `main_dir` to the location of your `af-werx` folder (for Windows).
main_dir = "C:\\Users\\[username]\\Desktop\\af-werx"
# Make `data_dir` from the `main_dir` and
# remainder of the path to data directory.
data_dir = main_dir + "/data"




In [None]:
#=================================================-
#### Slide 5: Working directory  ####

# Set working directory.
os.chdir(data_dir)
# Check working directory.
print(os.getcwd())



In [None]:
#=================================================-
#### Slide 6: Load passenger miles dataset  ####

# Read pickle file into `passenger_miles` variable.
passenger_miles = pickle.load(open((data_dir + "/passenger_miles.sav"),"rb"))
print(passenger_miles.head())




In [None]:
#=================================================-
#### Slide 16: Time based indexing  ####

# Time based indexing.
print(passenger_miles.loc['2001-09-01'])



In [None]:
#=================================================-
#### Slide 17: Time based indexing for date ranges  ####

# Time based indexing.
print(passenger_miles.loc['2001-09-01':'2002-04-01'])



In [None]:
#=================================================-
#### Slide 18: Time based indexing for partial dates  ####

# Partial indexing.
print(passenger_miles.loc['2001-09':'2002-04'])



In [None]:
#=================================================-
#### Slide 19: Visualize a subset of data  ####

# Let's plot just the miles for 1999 and 2000.
fig, ax = plt.subplots(figsize = (16, 6))
passenger_miles.loc['1999':'2000', 'revenue_passenger_miles'].plot(marker = 'o') #<- set marker
plt.title('Revenue passenger miles', fontsize = 20)
plt.xlabel('Date', fontsize = 18)
plt.ylabel('Revenue (billions of miles)', fontsize = 18)
ax.tick_params(labelsize = 14)
plt.tight_layout() #<- allows labels to fit within plotting area
plt.show()



In [None]:
#=================================================-
#### Slide 22: Augmenting time series data using DatetimeIndex  ####

# Add columns with year, month, and weekday name.
passenger_miles['year'] = passenger_miles.index.year
passenger_miles['month'] = passenger_miles.index.month
passenger_miles['quarter'] = passenger_miles.index.quarter
print(passenger_miles.head())




In [None]:
#=================================================-
#### Slide 24: Exercise 1  ####





In [None]:
#=================================================-
#### Slide 27: Visualize data for different periods: by month  ####

passenger_miles.boxplot(column = 'revenue_passenger_miles',
by = 'month',
figsize = (16, 6), fontsize = 14) #<- you can adjust figure and tick fontsize
plt.suptitle('Revenue passenger miles', fontsize = 20)    #<- change default subtitle
plt.title('grouped by month', fontsize = 18)              #<- change default title
plt.xlabel('Month', fontsize = 18)
plt.ylabel('Revenue (billions of miles)', fontsize = 18)
plt.show()



In [None]:
#=================================================-
#### Slide 29: Visualize data for different periods: by quarter  ####

passenger_miles.boxplot(column = 'revenue_passenger_miles',
by = 'quarter',
figsize = (16, 6), fontsize = 14)
plt.suptitle('Revenue passenger miles', fontsize = 20)
plt.title('grouped by quarter', fontsize = 18)
plt.xlabel('Quarter', fontsize = 18)
plt.ylabel('Revenue (billions of miles)', fontsize = 18)
plt.show()



In [None]:
#=================================================-
#### Slide 32: Adding seasons to data  ####

# Apply math formula to convert months to corresponding seasons.
passenger_miles['season'] = (passenger_miles['month']%12 + 3)//3

# Make a dictionary mapping numeric values to season names.
season_dict = {1: 'winter',
2: 'spring',
3: 'summer',
4: 'fall'}

# Map numeric values to corresponding season names.
passenger_miles['season'] = passenger_miles['season'].map(season_dict)
print(passenger_miles.head())




In [None]:
#=================================================-
#### Slide 33: Seasonal distributions  ####

passenger_miles.boxplot(column = 'revenue_passenger_miles',
by = 'season',
figsize = (16, 6), fontsize = 14)
plt.suptitle('Revenue passenger miles', fontsize = 20)
plt.title('grouped by season', fontsize = 18)
plt.xlabel('Season', fontsize = 18)
plt.ylabel('Revenue (billions of miles)', fontsize = 18)
plt.show()



In [None]:
#=================================================-
#### Slide 37: Resample: compute quarterly and seasonal means  ####

# Resample to quarterly frequency, aggregating with mean.
revenue_miles_quarterly_mean = passenger_miles['revenue_passenger_miles'].resample('Q').mean()

# Resample to seasonal frequency, aggregating with mean.
revenue_miles_seasonal_mean = passenger_miles['revenue_passenger_miles'].resample('QS-DEC').mean()



In [None]:
#=================================================-
#### Slide 38: Compare: quarterly vs seasonal  ####

print(revenue_miles_quarterly_mean.head(10))
print(revenue_miles_seasonal_mean.head(10))



In [None]:
#=================================================-
#### Slide 39: Visualize data for different periods: line graph  ####

# Name the start and end of the date range to extract.
start, end = '1979-01', '2002-04'

# Plot monthly and quarterly resampled time series together.
fig, ax = plt.subplots(figsize = (16, 10))
ax.plot(passenger_miles['revenue_passenger_miles'].loc[start:end, ],
marker = '.',
linestyle = '-',
linewidth = 0.5,
label = 'Monthly value')
ax.plot(revenue_miles_quarterly_mean.loc[start:end, ],
marker = 'o',
markersize = 5,
linestyle = '-',
label = 'Quarterly mean')
ax.plot(revenue_miles_seasonal_mean.loc[start:end, ],
marker = 'o',
markersize = 5,
linestyle = '--',
label = 'Seasonal mean')
ax.set_ylabel("Passenger miles (billions)", fontsize = 18)
ax.set_xlabel('Date', fontsize = 18)
ax.set_title('Revenue passenger miles', fontsize = 20)
ax.legend()
plt.show()



In [None]:
#=================================================-
#### Slide 41: Visualize data for different periods: line graph  ####

# Label the start and end of the date range to extract.
start, end = '1998-12', '2001-01'

# Plot monthly and quarterly resampled time series together.
fig, ax = plt.subplots(figsize = (16, 10))
ax.plot(passenger_miles['revenue_passenger_miles'].loc[start:end, ],
marker = '.',
linestyle = '-',
linewidth = 0.5,
label = 'Monthly value')
ax.plot(revenue_miles_quarterly_mean.loc[start:end, ],
marker = 'o',
markersize = 5,
linestyle = '-',
label = 'Quarterly mean')
ax.plot(revenue_miles_seasonal_mean.loc[start:end, ],
marker = 'o',
markersize = 5,
linestyle = '--',
label = 'Seasonal mean')
ax.set_ylabel("Passenger miles (billions)", fontsize = 18)
ax.set_xlabel('Date', fontsize = 18)
ax.set_title('Revenue passenger miles', fontsize = 20)
ax.legend()
plt.show()



In [None]:
#=================================================-
#### Slide 45: Exercise 2  ####





In [None]:
#=================================================-
#### Slide 69: Seasonal decomposition: passenger miles data  ####

# Decompose revenue passenger miles into its deterministic components.
res = seasonal_decompose(passenger_miles['revenue_passenger_miles'])
print(res)



In [None]:
#=================================================-
#### Slide 70: Seasonal decomposition: visualize components  ####

res.plot()
plt.show()



In [None]:
#=================================================-
#### Slide 71: Seasonal decomposition: get trend component  ####

# Extract just the trend component.
trend = res.trend
print(trend.head(5))
# Plot just the trend component.
plt.subplots(figsize=(10,7))
trend.plot()
plt.show()



In [None]:
#=================================================-
#### Slide 72: Seasonal decomposition: get detrended series  ####

# Create detrended series by subtracting
# the trend component from the main series.
detrended = passenger_miles['revenue_passenger_miles'] - trend
print(detrended.head(5))
# Plot detrended series.
plt.subplots(figsize = (10, 7))
detrended.plot()
plt.show()



In [None]:
#=================================================-
#### Slide 73: Seasonal decomposition: get seasonal component  ####

# Extract just the seasonal component.
seasonal = res.seasonal
print(seasonal.head(5))
fig, axes = plt.subplots(ncols = 1, nrows = 2,
figsize = (10, 7))
# Plot the seasonal component.
seasonal.plot(ax = axes[0])
# Plot the seasonal component (zoomed in).
seasonal.loc['1979':'1980'].plot(ax = axes[1])
plt.show()



In [None]:
#=================================================-
#### Slide 74: Seasonal decomposition: deseasonalized series  ####

# Create deseasonalized series by subtracting
# the seasonal component from the main series.
deseasonalized = passenger_miles['revenue_passenger_miles'] - seasonal
print(deseasonalized.head(20))
# Plot deseasonalized series.
plt.subplots(figsize = (10, 7))
deseasonalized.plot()
plt.show()



In [None]:
#=================================================-
#### Slide 75: Seasonal decomposition: get residual component  ####

# Extract just the residuals.
residuals = res.resid
print(residuals.head(20))
# Plot just the residuals.
plt.subplots(figsize = (10, 7))
residuals.plot()
plt.show()

