In [None]:
# Import useful libraries
# from dateutil.parser import parse 
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from pandas.plotting import register_matplotlib_converters # Register converters
register_matplotlib_converters()

plt.rcParams.update({'figure.figsize': (10, 7), 'figure.dpi': 120}) # Set fig size

In [None]:
# Import as Dataframe
df = pd.read_csv('../data/price_daily.csv', parse_dates=['Date'], index_col='Date')
df.head()

# Check if Dataframe is empty
if df.empty == True:
    print('DataFrame is empty')
else:
    print('DataFrame is not empty')

In [None]:
# Time series data source: fpp pacakge in R.
# Visualize Month and yearly data

# Draw Plot
def plot_df(df, x, y, title="", xlabel='Date', ylabel='Price', dpi=100):
    plt.figure(figsize=(16,5), dpi=dpi)
    plt.plot(x, y, color='tab:red')
    plt.gca().set(title=title, xlabel=xlabel, ylabel=ylabel)
    plt.show()

plot_df(df, x=df.index, y=df.Price, title='Yearly EIA Natural Gas Price')

df_mon = pd.read_csv('../data/price_monthly.csv', parse_dates=['Month'], index_col='Month')

print(df_mon.empty) # Check empty dataframe 

def plot_df_m(df_mon, x, y, title="", xlabel='Month', ylabel='Price', dpi=100):
    plt.figure(figsize=(16,5), dpi=dpi)
    plt.plot(x, y, color='tab:blue')
    plt.gca().set(title=title, xlabel=xlabel, ylabel=ylabel)
    plt.show()

plot_df_m(df_mon, x=df_mon.index, y=df_mon.Price, title='Monthly EIA Natural Gas Price')

In [None]:
# Seasonal Plot of Natural Gas analysis - Year, Month

# Import Data
df.reset_index(inplace=True)

# Prepare data
df['year'] = [d.year for d in df.Date]
df['month'] = [d.strftime('%b') for d in df.Date]
years = df['year'].unique()

# Prep Colors
np.random.seed(100)
mycolors = np.random.choice(list(mpl.colors.XKCD_COLORS.keys()), len(years), replace=False)

# Draw Plot
plt.figure(figsize=(16,12), dpi= 80)
for i, y in enumerate(years):
    if i > 0:        
        plt.plot('month', 'Price', data=df.loc[df.year==y, :], color=mycolors[i], label=y)
        plt.text(df.loc[df.year==y, :].shape[0]-.9, df.loc[df.year==y, 'Price'][-1:].values[0], y, fontsize=12, color=mycolors[i])

# Decoration
plt.gca().set(xlim=(-0.3, 11), ylim=(2, 30), ylabel='$Price$', xlabel='$Month$')
plt.yticks(fontsize=12, alpha=.7)
plt.title("Seasonal Plot of Drug Sales Time Series", fontsize=20)
plt.show()

In [None]:
import seaborn as sns

# Find Trend in Yearly, Monthly data
df.reset_index(inplace=True)

# Prepare data
df['year'] = [d.year for d in df.Date]
df['month'] = [d.strftime('%b') for d in df.Date]
years = df['year'].unique()

# Draw Plot
fig, axes = plt.subplots(1, 2, figsize=(20,7), dpi= 80)
sns.boxplot(x='year', y='Price', data=df, ax=axes[0])
sns.boxplot(x='month', y='Price', data=df.loc[~df.year.isin([1991, 2008]), :])

# Set Title
axes[0].set_title('Year-wise Box Plot\n(The Trend)', fontsize=18); 
axes[1].set_title('Month-wise Box Plot\n(The Seasonality)', fontsize=18)
plt.show()

In [None]:
# Decompose data
# Trend, seasonal and residual components for Monthly data

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
from dateutil.parser import parse

# Multiplicative Decomposition 
result_mul = seasonal_decompose(df_mon['Price'], model='multiplicative', extrapolate_trend='freq')

# Additive Decomposition
result_add = seasonal_decompose(df_mon['Price'], model='additive', extrapolate_trend='freq')

# Plot
plt.rcParams.update({'figure.figsize': (12,12)})
result_mul.plot().suptitle('Multiplicative Decompose', fontsize=10)
result_add.plot().suptitle('Additive Decompose', fontsize=10)
plt.show()

df_reconstructed = pd.concat([result_mul.seasonal, result_mul.trend, result_mul.resid, result_mul.observed], axis=1)
df_reconstructed.columns = ['seas', 'trend', 'resid', 'actual_values']
df_reconstructed.head()

In [None]:
# Test stationary series with Augmented Dickey Fuller test (ADH Test)

from pandas import read_csv
from statsmodels.tsa.stattools import adfuller
series = read_csv('../data/price_monthly.csv', header=0, index_col=0, squeeze=True)
X = series.values
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

In [None]:
# Using scipy: Subtract the line of best fit

from scipy import signal
detrended = signal.detrend(df_mon.Price.values)
plt.plot(detrended)
plt.title('Gas Sales detrended by subtracting the least squares fit', fontsize=16)

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
result_mul = seasonal_decompose(df_mon['Price'], model='multiplicative', extrapolate_trend='freq')
detrended = df_mon.Price.values - result_mul.trend
plt.plot(detrended)
plt.title('Gas Sales detrended by subtracting the trend component', fontsize=16)

In [None]:
# Deseasonalize a time series - Subtracting the Trend Component

# Time Series Decomposition
result_mul = seasonal_decompose(df_mon['Price'], model='multiplicative', extrapolate_trend='freq')

# Deseasonalize
deseasonalized = df_mon.Price.values / result_mul.seasonal

# Plot
plt.plot(deseasonalized)
plt.title('Gas Sales Deseasonalized', fontsize=16)
plt.plot()

In [None]:
# Test seasonality of a time series
from pandas.plotting import autocorrelation_plot

# Draw Plot
plt.rcParams.update({'figure.figsize':(9,5), 'figure.dpi':120})
autocorrelation_plot(df_mon.Price.tolist())

In [None]:
# Check autocorrelation and partial autocorrelation
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Calculate ACF and PACF upto 50 lags
# acf_50 = acf(df.value, nlags=50)
# pacf_50 = pacf(df.value, nlags=50)

# Draw Plot
# For Yearly autocorrelation, detrend not available
fig, axes = plt.subplots(1,2,figsize=(16,3), dpi= 100)
plot_acf(df.Price.tolist(), lags=50, ax=axes[0])
plot_pacf(df.Price.tolist(), lags=50, ax=axes[1])

In [None]:
# For monthly
fig, axes = plt.subplots(1,2,figsize=(16,3), dpi= 100)
plot_acf(df_mon.Price.tolist(), lags=50, ax=axes[0])
plot_pacf(df_mon.Price.tolist(), lags=50, ax=axes[1])

In [None]:
# Scatter plot check autocorrelation
# Validate between deseasonalize (monthly) and seasonalize (yearly) time data

from pandas.plotting import lag_plot
plt.rcParams.update({'ytick.left' : False, 'axes.titlepad':10})

# Plot
fig, axes = plt.subplots(1, 4, figsize=(10,3), sharex=True, sharey=True, dpi=100)
for i, ax in enumerate(axes.flatten()[:4]):
    lag_plot(df.Price, lag=i+1, ax=ax, c='firebrick')
    ax.set_title('Lag ' + str(i+1))

fig.suptitle('Lag Plots of Yearly price \n(Points get wide and scattered with increasing lag -> lesser correlation)\n', y=1.15)    

fig, axes = plt.subplots(1, 4, figsize=(10,3), sharex=True, sharey=True, dpi=100)
for i, ax in enumerate(axes.flatten()[:4]):
    lag_plot(df_mon.Price, lag=i+1, ax=ax, c='firebrick')
    ax.set_title('Lag ' + str(i+1))

fig.suptitle('Lag Plots of Monthly price', y=1.05)    
plt.show()