### Import libraries and read in file

In [242]:
# import libs
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

plt.style.use('fivethirtyeight')

In [243]:
# read in file and define some useful column groupings
elec_w_weather_path = '/users/ianmyjer/desktop/disagg/electric_data_with_weather.csv'
df = pd.read_csv(elec_w_weather_path,delimiter=',',header=0,index_col=0,parse_dates=True)

In [244]:
# define useful column groupings
power_cols = ['north_br','south_br','basement','dryer','washer',
'dining_room','dishwasher','workbench','security_system',
'refrigerator','furnace_fan','garage','heat_pump',
'dhw_heater','main_house_total','office','outside_plug',
'rental_suite','entertainment','utility_room','unmetered',
'whole_house_total','oven']
weather_cols = ['Temp (C)', 'Dew Point Temp (C)', 'Rel Hum (%)', 
'Wind Spd (km/h)','Stn Press (kPa)']

### X Vars

In [245]:
# time-based features
df['dow'] = df.index.weekday
df['weekday'] = (df.index.weekday < 5)*1
df['business_hours'] = (df.index.weekday < 5)&(df.index.hour>=9)&(df.index.hour<=17)*1
df['hour'] = df.index.hour

# season dummies
df['winter'] = (df.index.month<=3)*1
df['spring'] = (df.index.month>3)&(df.index.month<=6)*1
df['summer'] = (df.index.month>6)&(df.index.month<=9)*1
df['fall'] = (df.index.month>9)*1

In [6]:
# main house power percentage change 1 min intervals
df['pos_mht_pct'] = df['main_house_total'].pct_change(freq='1Min').fillna(0)
df['neg_mht_pct'] = df['main_house_total'].pct_change(freq='-1Min').fillna(0)

In [248]:
# main house power difference 1 min intervals
df['pos_mht_diff'] = df['main_house_total'].diff(periods=1).fillna(0)
df['neg_mht_diff'] = df['main_house_total'].diff(periods=-1).fillna(0)

### ON/OFF Appliance Startups
Appliances that are either OFF and have 0 power draw or ON and have high power draw

In [249]:
of_apps = ['dishwasher', 'oven', 'dryer', 'washer']

In [250]:
# identify point when appliance starts up
# appliance must be using more than threshold power 
# and previous points must sum to a small number
for lbl in of_apps:
    # dishwashers seem to obey different rules than the other appliances
    if lbl == 'dishwasher':
        lower_thresh = df[lbl][df[lbl]>0].quantile(0.50)
    else:
        lower_thresh = df[lbl][df[lbl]>0].quantile(0.25)
    roll = df[lbl].rolling(20).sum().shift(1) # rolling sum shifted back 1 minute
    crit = (df[lbl] > lower_thresh)&(df[lbl] > roll) # criteria for appliance startup
    df[lbl+'_start'] = crit*1 # create binary column indicating appliance startup

In [None]:
# plot spot checks to see if methodology works
lbl = 'dishwasher'
mask = (df.index.week==6)&(df.index.year==2013)# &(df.index.day==26)&(df.index.hour > 14)
df[lbl][mask].plot(linewidth=1,figsize=(20,10));
df[lbl+'_start'][mask].plot(secondary_y=True,linewidth=2,figsize=(20,10));

In [251]:
# loop through the calculted "start" columns
# find the "1s" and create a countdown starting at 60 mins from that point
# add column to dataframe
for lbl in of_apps:
    res = []
    for val in df[lbl+'_start']:
        if val == 1:
            res.append(60)
        elif not res:
            res.append(0)
        elif val == 0 and res[-1] != 0:
            res.append(res[-1]-1)
        else:
            res.append(0)

    res_shift = res[60:]
    res_shift.extend([0]*60)

    df[lbl+'_countdown'] = res_shift


### Attempt a basic model

In [None]:
# feature_cols = ['dow','hour','winter','spring','summer','pos_mht_pct','neg_mht_pct','pos_mht_diff','neg_mht_diff']
feature_cols = ['dow','hour','winter','spring','summer','pos_mht_diff','neg_mht_diff']
X = df[feature_cols]
y = df['washer_countdown']

### Figure out how to train/test/split
# probably this: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.TimeSeriesSplit.html

In [None]:
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(X, y)
y_pred = linreg.predict(X)
print(linreg.intercept_)
print(linreg.coef_)

In [None]:
from sklearn import metrics
print('RMSE:', np.sqrt(metrics.mean_squared_error(y,y_pred)))