In [None]:
import pandas as pd
from pandas.tools.plotting import autocorrelation_plot
from matplotlib import pyplot as plt
from statsmodels.tsa.arima_model import ARIMA

plt.rcParams['figure.figsize'] = (10, 6)
%matplotlib inline

random intro to time-series

random intro to dataset

What is the data set?
------------------------
Prices and weather forecasts for the New York State power grid


Definitions:
------------

NYISO: New York Independent System Operator
LBMP: Locational Based Marginal Pricing


In [None]:
# load all the data
day_ahead_market = pd.read_csv('datasets/timeseries/day_ahead_market_lbmp.csv.gz')
real_time_market = pd.read_csv('datasets/timeseries/real_time_market_lbmp.csv.gz')
weather_forecast = pd.read_csv('datasets/timeseries/weather_forecast.csv.gz')

In [None]:
day_ahead_market['Time Stamp'] = pd.to_datetime(day_ahead_market['Time Stamp'], format='%m/%d/%Y %H:%M')
real_time_market['Time Stamp'] = pd.to_datetime(real_time_market['Time Stamp'], format='%m/%d/%Y %H:%M:%S')

In [None]:
weather_forecast['Forecast Date'] = pd.to_datetime(weather_forecast['Forecast Date'], format='%m/%d/%Y')
weather_forecast['Vintage Date'] = pd.to_datetime(weather_forecast['Vintage Date'], format='%m/%d/%Y')
weather_forecast['Vintage'] = weather_forecast['Vintage'].astype('category')

In [None]:
# insert some graphs here
dam_time_name = day_ahead_market.set_index(['Name', 'Time Stamp'])
rtm_time_name = real_time_market.set_index(['Name', 'Time Stamp'])

In [None]:
dam_nyc_lbmp = dam_time_name['LBMP ($/MWHr)']['N.Y.C.']
rtm_nyc_lbmp = rtm_time_name['LBMP ($/MWHr)']['N.Y.C.']

In [None]:
plt.figure(figsize=(10,8))
dam_nyc_lbmp.plot(title='NYC Day Ahead LBMP 2015')

In [None]:
plt.figure(figsize=(10,8))
rtm_nyc_lbmp.plot(title='NYC Realtime LBMP 2015')

In [None]:
aligned_dam, aligned_rtm = rtm_nyc_lbmp.align(dam_nyc_lbmp, join='inner')

no_dup_al_dam = aligned_dam[~aligned_dam.index.duplicated(keep='first')]
no_dup_al_rtm = aligned_rtm[~aligned_dam.index.duplicated(keep='first')]

no_dup_al_dam.name = 'dam_lbmp'
no_dup_al_rtm.name = 'rtm_lbmp'

dam_rtm_df = pd.DataFrame([no_dup_al_dam, no_dup_al_rtm]).transpose()

lga_and_jfk_indexed = weather_forecast[(weather_forecast['Station ID'] == 'LGA') | (weather_forecast['Station ID'] == 'JFK')].set_index(['Forecast Date', 'Vintage Date', 'Vintage', 'Station ID'])
mean_nyc_indexed = lga_and_jfk_indexed.mean(level=[0,1,2])

mean_nyc = mean_nyc_indexed.reset_index()
actual_temp_df = mean_nyc[mean_nyc['Vintage'] == 'Actual'] \
    .groupby(['Vintage Date']).first() \
    .rename(columns=lambda x: 'Actual ' + x)

dam_rtm_act_df = dam_rtm_df.join(actual_temp_df, how='left').fillna(method='ffill').dropna()

In [None]:
daily_df = dam_rtm_act_df.resample('D', how='mean')

In [None]:
plt.figure(figsize=(14,10))
plt.plot_date(daily_df.index, daily_df['dam_lbmp'], '-', label='LBMP')
plt.plot_date(daily_df.index, daily_df['Actual Min Temp'], '-', label='Min Temp')
plt.plot_date(daily_df.index, daily_df['Actual Max Temp'], '-', label='Max Temp')
plt.legend()

In [None]:
plt.figure(figsize=(10,8))
autocorrelation_plot(daily_df['dam_lbmp'])

In [None]:
m = ARIMA(daily_df['dam_lbmp'].values, [10,0,0], exog=daily_df['Actual Min Temp'].values, dates=daily_df.index.values)

In [None]:
results = m.fit(trend='c', disp=True)

In [None]:
results.summary()

In [None]:
predicted_prices = results.predict(10, 364, exog=daily_df['Actual Min Temp'].values, dynamic=False)

In [None]:
plt.figure(figsize=(10, 8))
plt.plot(predicted_prices, label='prediction')
plt.plot(daily_df['dam_lbmp'].values[10:], label='original')
plt.legend()

AR and EWMA analysis of data set

panel analysis of data set (forecast from weather)

talk about cool models