In [1]:
import sys
sys.path.insert(0, '/home/johnr_000/jupyter_blog/jupyter_blog/scripts')
import eia_model as em
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from datetime import datetime

In [19]:
#variables to specify date range and frequency
api_key = em.eia_api
start = '2015-12-01 01:00:00'
end = '2016-01-31 23:00:00'
freq = 'H'
# create list of series used for dictionary keys
keys = ['EBA.BPAT-ALL.D.H', 'EBA.PACE-ALL.D.H', 'EBA.PACW-ALL.D.H', 
        'EBA.PGE-ALL.D.H', 'EBA.PSEI-ALL.D.H', 'EBA.SCL-ALL.D.H']
# create dict of GetSeriesRange objects from series list
series_dict = {key: em.GetSeries(api_key=api_key, series_id=key, 
                                   start=start, end=end, freq=freq) for key in keys}

In [20]:
# loop over dictionary and print out full name of each series
for key in series_dict:
    print('{}: {}'.format(series_dict[key].data.series[0]['name'],key))

Demand for PacifiCorp West (PACW), Hourly: EBA.PACW-ALL.D.H
Demand for Seattle City Light (SCL), Hourly: EBA.SCL-ALL.D.H
Demand for Portland General Electric Company (PGE), Hourly: EBA.PGE-ALL.D.H
Demand for Puget Sound Energy, Inc. (PSEI), Hourly: EBA.PSEI-ALL.D.H
Demand for Bonneville Power Administration (BPAT), Hourly: EBA.BPAT-ALL.D.H
Demand for PacifiCorp East (PACE), Hourly: EBA.PACE-ALL.D.H


In [21]:
# all the dataframes have the same date range so we can concatenate in a loop
concat_df = pd.concat([series_dict[key].data.df for key in series_dict], axis=1)
# and output as a pickle file - I'm doing this to avoid calling the EIA API as I develop
concat_df.to_pickle('data/nw_load_df.pkl')
# concat_df = pd.read_pickle('data/new_load_df.pkl')

In [2]:
concat_df = pd.read_pickle('/home/johnr_000/jupyter_blog/jupyter_blog/data/nw_load_df.pkl')

In [57]:
concat_df.describe()



Unnamed: 0,EBA.PACW-ALL.D.H,EBA.SCL-ALL.D.H,EBA.PGE-ALL.D.H,EBA.PSEI-ALL.D.H,EBA.BPAT-ALL.D.H,EBA.PACE-ALL.D.H,hour,HOUR
count,1487.0,1487.0,1487.0,1482.0,1487.0,1487.0,1487.0,1487.0
mean,4451.198,1282.258911,2579.921991,3946.066127,7059.477471,-24706.17,11.507734,11.507734
std,65585.28,177.394473,385.764476,554.268601,833.190306,895045.5,6.920412,6.920412
min,-6785.0,868.0,1730.0,2292.0,5006.0,-31990170.0,0.0,0.0
25%,2393.5,1128.5,2272.0,,6451.5,5169.0,6.0,6.0
50%,2709.0,1324.0,2646.0,,7109.0,5508.0,12.0,12.0
75%,2941.5,1416.0,2872.5,,7637.5,5822.5,17.5,17.5
max,2530003.0,1654.0,3472.0,5088.0,9365.0,59668.0,23.0,23.0


In [9]:
concat_df['HOUR'] = concat_df.index.to_datetime().hour
# train, test = train_test_split(concat_df, test_size = 0.2)
# print('Length of training dataframe: {} and testing dataframe {}'.format(len(train), len(test)))

Length of training dataframe: 1189 and testing dataframe 298


In [59]:
rng = np.random.RandomState(42)
clf = IsolationForest(max_samples=100, random_state=rng)
x_train = concat_df[['HOUR', 'EBA.PACW-ALL.D.H']].values
clf.fit(x_train)
y_pred_train = clf.predict(x_train)

In [60]:
test = clf.decision_function(x_train)

In [61]:
test2 = np.column_stack((concat_df.index.values, concat_df['EBA.PACW-ALL.D.H'].values, test))
for i in test2:
    print(i)

['2015-12-01 01:00:00' 3353 -0.08483705118914087]
['2015-12-01 02:00:00' 3468 -0.07716903753667825]
['2015-12-01 03:00:00' 3438 -0.07095122112248398]
['2015-12-01 04:00:00' 3357 -0.052216083054078655]
['2015-12-01 05:00:00' 3249 -0.0215427352310974]
['2015-12-01 06:00:00' 3077 0.02111862350666094]
['2015-12-01 07:00:00' 2829 0.04049894325177472]
['2015-12-01 08:00:00' 2648 0.05240513840540828]
['2015-12-01 09:00:00' 2538 0.05795174488755972]
['2015-12-01 10:00:00' 2494 0.05664477487348818]
['2015-12-01 11:00:00' 2505 0.05790208055768109]
['2015-12-01 12:00:00' 2545 0.05639834003252725]
['2015-12-01 13:00:00' 2644 0.05126887843674277]
['2015-12-01 14:00:00' 2937 0.033498391469789]
['2015-12-01 15:00:00' 3330 -0.03948369988206102]
['2015-12-01 16:00:00' 3533 -0.07207060364255036]
['2015-12-01 17:00:00' 3439 -0.054708045233725855]
['2015-12-01 18:00:00' 3363 -0.0383144664370364]
['2015-12-01 19:00:00' 3219 -0.002699216078814737]
['2015-12-01 20:00:00' 3093 0.022338274760597743]
['2015-12-

In [55]:
test2.shape

(1487, 2)