# Autoregression Model

In [1]:
import pickle
import datetime
import numpy as np
import matplotlib.pyplot as plt

# Loading Data File

In [2]:
path = "../Data/"
building = "avenal-animal-shelter"
interval = 5 # min


with open(path + building + "_training_data.pkl", 'r') as f:
    training_data = pickle.load(f)

with open(path + building + "_test_data.pkl", "r") as f:
    test_data = pickle.load(f)
    

# Features in Dataset

In [3]:
training_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 53187 entries, 2018-04-18 23:16:42+00:00 to 2018-10-25 23:13:42+00:00
Data columns (total 9 columns):
action                                        53187 non-null float64
action_duration                               53187 non-null object
dt                                            53187 non-null float64
previous_action                               42307 non-null float64
t_next                                        53187 non-null float64
t_in                                          53187 non-null float64
t_out                                         53186 non-null float64
occ                                           53186 non-null float64
zone_temperatureHVAC_Zone_Shelter_Corridor    53186 non-null float64
dtypes: float64(8), object(1)
memory usage: 4.1+ MB


# Include previous indoor temperature as t_last

In [4]:
def add_last_temperature_feature(data):
    """Adding a feature which specifies what the previous temperature was "dt" seconds before the current
    datasample. Since data does not need be continious, we need a loop.
    :param: pd.df with cols: "t_in", "dt" and needs to be sorted by time.
    returns pd.df with cols "t_last" added. """

    last_temps = []

    last_temp = None
    curr_time = data.index[0]
    for index, row in data.iterrows():

        if last_temp is None:
            last_temps.append(row["t_in"])  # so the feature will be zero instead
        else:
            last_temps.append(last_temp)

        if curr_time == index:
            last_temp = row["t_in"]
            curr_time += datetime.timedelta(minutes=row["dt"])
        else:
            last_temp = None
            curr_time = index + datetime.timedelta(minutes=row["dt"])

    data["t_last"] = np.array(last_temps)
    return data

training_data = add_last_temperature_feature(training_data)
test_data = add_last_temperature_feature(test_data)

training_data.head()

Unnamed: 0_level_0,action,action_duration,dt,previous_action,t_next,t_in,t_out,occ,zone_temperatureHVAC_Zone_Shelter_Corridor,t_last
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-04-18 23:16:42+00:00,0.0,300.0s,5.0,,69.5,69.483333,66.4997,0.0,69.499167,69.483333
2018-04-18 23:21:42+00:00,0.0,600.0s,5.0,,69.5,69.5,66.4997,0.0,69.485833,69.483333
2018-04-18 23:26:42+00:00,0.0,900.0s,5.0,,69.5,69.5,66.520546,0.0,69.460833,69.5
2018-04-18 23:31:42+00:00,0.0,1200.0s,5.0,,69.4,69.5,66.583083,0.0,69.45,69.5
2018-04-18 23:36:42+00:00,0.0,1500.0s,5.0,,69.5,69.4,66.666466,0.0,69.450489,69.5


# Preprocessing Data

Action features has values 0,1,2,3,4,5 which is transformed to values:
0 - no action
1 - heating
2 - cooling

Select the rows in datasets such that interval time is constant (5 minutes)

In [5]:
def fix_data(data, interval):
    """Fixes up the data. Makes sure we count two stage as single stage actions, don't count float actions,
    converts action duration and dt to floats, fill's nan's in action_duration and drops all datapoints which
    don't have dt equal to interval.
    :param data:
    :param interval: float:minutes"""
    def f(x):
        if x == 0:
            return 0
        elif x == 2 or x == 5:
            return 2
        elif x ==1 or x == 3:
            return 1
        
    def h(x):
        if x == 1:
            return 1
        else:
            return 0
        
    def c(x):
        if x == 2:
            return 1
        else:
            return 0

    data["action"] = data["action"].map(f)
    data['action_heating'] = data["action"].map(h)
    data['action_cooling'] = data['action'].map(c)
    
    print data.head()

    return data, data[data["dt"] == interval]

# get preproccessed test and training data
org_training_data, training_data = fix_data(training_data, interval)
org_test_data, test_data = fix_data(test_data, interval)


#print(test_data.head())

                           action action_duration   dt  previous_action  \
time                                                                      
2018-04-18 23:16:42+00:00       0          300.0s  5.0              NaN   
2018-04-18 23:21:42+00:00       0          600.0s  5.0              NaN   
2018-04-18 23:26:42+00:00       0          900.0s  5.0              NaN   
2018-04-18 23:31:42+00:00       0         1200.0s  5.0              NaN   
2018-04-18 23:36:42+00:00       0         1500.0s  5.0              NaN   

                           t_next       t_in      t_out  occ  \
time                                                           
2018-04-18 23:16:42+00:00    69.5  69.483333  66.499700  0.0   
2018-04-18 23:21:42+00:00    69.5  69.500000  66.499700  0.0   
2018-04-18 23:26:42+00:00    69.5  69.500000  66.520546  0.0   
2018-04-18 23:31:42+00:00    69.4  69.500000  66.583083  0.0   
2018-04-18 23:36:42+00:00    69.5  69.400000  66.666466  0.0   

                         

# Modifying action feature

Action is represnted as two separate features: action_heating and action_cooling, which holds value either 0(off) or 1(on). 

# Dropping Features

In [6]:
def filter_data(data):
    data = data.drop(['dt', 'action', 'previous_action', 'action_duration', 'zone_temperatureHVAC_Zone_Shelter_Corridor'], axis=1)
        
    return data

training_data = filter_data(training_data)
test_data = filter_data(test_data)

print(training_data.head())
print(test_data.head())

                           t_next       t_in      t_out  occ     t_last  \
time                                                                      
2018-04-18 23:16:42+00:00    69.5  69.483333  66.499700  0.0  69.483333   
2018-04-18 23:21:42+00:00    69.5  69.500000  66.499700  0.0  69.483333   
2018-04-18 23:26:42+00:00    69.5  69.500000  66.520546  0.0  69.500000   
2018-04-18 23:31:42+00:00    69.4  69.500000  66.583083  0.0  69.500000   
2018-04-18 23:36:42+00:00    69.5  69.400000  66.666466  0.0  69.500000   

                           action_heating  action_cooling  
time                                                       
2018-04-18 23:16:42+00:00               0               0  
2018-04-18 23:21:42+00:00               0               0  
2018-04-18 23:26:42+00:00               0               0  
2018-04-18 23:31:42+00:00               0               0  
2018-04-18 23:36:42+00:00               0               0  
                           t_next  t_in      t_out  oc

# Separating Target features 

In [7]:
def getTarget(data, target):
    y = data[target]
    return y

train_y = getTarget(training_data, 't_next')
train_y.head()

time
2018-04-18 23:16:42+00:00    69.5
2018-04-18 23:21:42+00:00    69.5
2018-04-18 23:26:42+00:00    69.5
2018-04-18 23:31:42+00:00    69.4
2018-04-18 23:36:42+00:00    69.5
Name: t_next, dtype: float64

In [8]:
def getFeatures(data, target):
    X = data.drop([target], axis=1)
    return X

train_X = getFeatures(training_data, 't_next')
train_X.head()

Unnamed: 0_level_0,t_in,t_out,occ,t_last,action_heating,action_cooling
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-04-18 23:16:42+00:00,69.483333,66.4997,0.0,69.483333,0,0
2018-04-18 23:21:42+00:00,69.5,66.4997,0.0,69.483333,0,0
2018-04-18 23:26:42+00:00,69.5,66.520546,0.0,69.5,0,0
2018-04-18 23:31:42+00:00,69.5,66.583083,0.0,69.5,0,0
2018-04-18 23:36:42+00:00,69.4,66.666466,0.0,69.5,0,0


In [9]:
test_y = getTarget(test_data, 't_next')
test_y.head()

time
2018-03-20 00:00:00+00:00    72.8
2018-03-20 00:05:00+00:00    73.0
2018-03-20 00:10:00+00:00    73.0
2018-03-20 00:15:00+00:00    73.0
2018-03-20 00:20:00+00:00    73.0
Name: t_next, dtype: float64

In [10]:
test_X = getFeatures(test_data, 't_next')
test_X.head()

Unnamed: 0_level_0,t_in,t_out,occ,t_last,action_heating,action_cooling
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-03-20 00:00:00+00:00,72.8,67.778613,0.0,72.8,0,0
2018-03-20 00:05:00+00:00,72.8,67.807436,0.0,72.8,0,0
2018-03-20 00:10:00+00:00,73.0,67.835998,0.0,72.8,0,0
2018-03-20 00:15:00+00:00,73.0,67.864038,0.0,73.0,0,0
2018-03-20 00:20:00+00:00,73.0,67.891817,0.0,73.0,0,0


# Linear Regression Model

In [11]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import time 

linearModel = LinearRegression().fit(train_X, train_y)
test_pred1 = linearModel.predict(test_X)
print test_pred1


[72.78808106 72.78814943 73.04479567 ... 72.33896613 72.339024
 72.46731324]


In [12]:
linearModel.score(test_X, test_y)

0.992678955494629

# AutoRegression that uses previous predicted value as current t_in

In [18]:
def autoRegression2(linearModel, train_X):
    t_pred = []
    test_pred2 = []
    for index, row in test_X.iterrows():
        if len(t_pred) >= 1 :
            row['t_in'] = t_pred[0]
        t_pred = linearModel.predict([row])
        #test_pred2.append(t_pred[0])    
        print t_pred[0]
    
autoRegression2(linearModel, train_X)
        
        

72.78808106196402
72.77285871550411
72.75339783180553
72.67053216240221
72.56429031330562
72.4279302387419
72.2526735176477
72.02738748174652
71.73806150569725
71.3378807736312
70.8246105832334
70.16613034023656
69.32110433442095
68.23663173940228
66.81614064274758
65.02283730178827
62.6934386908921
59.705153702828134
55.87135478083797
50.95273564185667
44.642146770798014
36.5452862882997
26.127586413593704
12.76166293049183
-4.386152910996895
-26.38554912349776
-54.60903328502855
-90.84640019162903
-137.3067828496928
-196.94034825272564
-273.4160986874411
-371.55671399733615
-497.4327805518046
-658.9478657026601
-866.1257380545519
-1131.9422397752353
-1472.9282681722461
-1910.3779155214238
-2471.5797282565336
-3191.541695222208
-4115.175580793976
-5300.098693834264
-6820.256892296518
-8770.427742401575
-11272.316433227512
-14481.97058226355
-18599.58742518054
-23882.071133335987
-30658.929762695232
-39352.91075675983
-50506.35347835125
-64815.02095568948
-83171.5023395502
-106720.8933

-4.52380110977213e+47
-5.8035501764967225e+47
-7.445330560257921e+47
-9.551558178303248e+47
-1.2253621635081732e+48
-1.5720078375988723e+48
-2.0167169470920254e+48
-2.587230895044678e+48
-3.319138917301041e+48
-4.258098174941623e+48
-5.4626818940693015e+48
-7.008033222766565e+48
-8.990552736508451e+48
-1.1533912003349534e+49
-1.47967683411503e+49
-1.898266202118455e+49
-2.435271331567724e+49
-3.124191144391222e+49
-4.008001153780714e+49
-5.141834320075756e+49
-6.596420300470656e+49
-8.462497636411647e+49
-1.0856474115386955e+50
-1.392768840619073e+50
-1.786772595579718e+50
-2.2922370282893607e+50
-2.9406935201824415e+50
-3.772593441654917e+50
-4.8398315493736695e+50
-6.208983233570321e+50
-7.965457558072732e+50
-1.0218825163902645e+51
-1.3109653396442895e+51
-1.6818275038304988e+51
-2.157603765030286e+51
-2.7679735265775744e+51
-3.551012270191672e+51
-4.555566742953246e+51
-5.844302066684062e+51
-7.497610851488766e+51
-9.618628167906604e+51
-1.2339665216697062e+52
-1.5830463035073615e+

-1.0388220455176231e+96
-1.3326969332469545e+96
-1.709706800649243e+96
-2.193369903736852e+96
-2.813857635000182e+96
-3.6098766453206294e+96
-4.631083403915876e+96
-5.9411818190037185e+96
-7.621897151887597e+96
-9.77807412123502e+96
-1.254421722243885e+97
-1.609288125378379e+97
-2.064543545890818e+97
-2.64858727636312e+97
-3.3978525541275237e+97
-4.359079303380323e+97
-5.592229818823839e+97
-7.174229274124768e+97
-9.203764391881501e+97
-1.1807439620975886e+98
-1.5147674849865515e+98
-1.9432837323142205e+98
-2.4930239800536876e+98
-3.198281579664746e+98
-4.103051211967299e+98
-5.263773319730974e+98
-6.7528549194556e+98
-8.663186424134678e+98
-1.1113936240964262e+99
-1.4257984616850326e+99
-1.8291460462499717e+99
-2.346597607180618e+99
-3.0104322950674216e+99
-3.862060787692325e+99
-4.954608529901057e+99
-6.356229752467575e+99
-8.154359001791979e+99
-1.046116539514516e+100
-1.342054984340685e+100
-1.7217121735113184e+100
-2.2087715056424044e+100
-2.8336162334195993e+100
-3.63522480156397

-5.645290415529466e+138
-7.242300311711005e+138
-9.291092210371254e+138
-1.1919471817819043e+139
-1.5291400106565664e+139
-1.9617221366261908e+139
-2.516678469276898e+139
-3.2286277447093933e+139
-4.141982077234657e+139
-5.313717431886013e+139
-6.816927842618871e+139
-8.745385091916426e+139
-1.1219388289216795e+140
-1.4393268250767369e+140
-1.8465014811695215e+140
-2.3688627631736507e+140
-3.0389960950351e+140
-3.898705070303633e+140
-5.001619202487227e+140
-6.416539388228382e+140
-8.231729776671539e+140
-1.0560423776164165e+141
-1.354788766854626e+141
-1.738048246641637e+141
-2.2297289301175705e+141
-2.86050235452891e+141
-3.6697168026760934e+141
-4.7078518885054625e+141
-6.03966752637199e+141
-7.748243719853386e+141
-9.940163176218816e+141
-1.275216004327316e+142
-1.6359649523491157e+142
-2.0987670451379346e+142
-2.6924923443085017e+142
-3.454178033219233e+142
-4.4313388338485885e+142
-5.684931023104635e+142
-7.293154946896524e+142
-9.356333236633229e+142
-1.2003169036217882e+143
-1.

-6.881608707094605e+180
-8.828363683002853e+180
-1.132584089516908e+181
-1.4529835492578324e+181
-1.864021589173464e+181
-2.391339177019273e+181
-3.0678309161016133e+181
-3.935697043829685e+181
-5.0490759251142075e+181
-6.477421258207773e+181
-8.309834666495518e+181
-1.0660623947684534e+182
-1.3676433709585922e+182
-1.75453932087459e+182
-2.250885204333193e+182
-2.88764357846399e+182
-3.7045360732710175e+182
-4.752521267000053e+182
-6.096973533677722e+182
-7.821761162540041e+182
-1.0034478146556055e+183
-1.287315600429465e+183
-1.6514874325356293e+183
-2.1186807173883588e+183
-2.718039443594984e+183
-3.486952213377766e+183
-4.473384581313638e+183
-5.738871194036246e+183
-7.362354383594611e+183
-9.44510866979609e+183
-1.211705836695967e+184
-1.5544882393764674e+184
-1.9942411872412763e+184
-2.558396912983233e+184
-3.282148020128236e+184
-4.21064283316007e+184
-5.40180179556605e+184
-6.929930605555908e+184
-8.890355480506496e+184
-1.1405369702606359e+185
-1.46318623972188e+185
-1.8771105

-1.7711868669027763e+223
-2.2722422150296654e+223
-2.9150423257097984e+223
-3.739685718570567e+223
-4.797614480700661e+223
-6.154823275958754e+223
-7.895976158707786e+223
-1.01296880029703e+224
-1.2995300007885682e+224
-1.6671571942337614e+224
-2.1387833359743993e+224
-2.7438289407041835e+224
-3.5200373638669295e+224
-4.515829270260292e+224
-5.793323163972677e+224
-7.432210403359168e+224
-9.534726428401391e+224
-1.223202831063105e+225
-1.5692376463617684e+225
-2.0131630897377983e+225
-2.5826716783649667e+225
-3.3132899327581376e+225
-4.2505945569768615e+225
-5.4530555594212806e+225
-6.995683670964905e+225
-8.974709590048419e+225
-1.151358695076582e+226
-1.4770693485150396e+226
-1.8949210785936062e+226
-2.4309798979365897e+226
-3.118685696692915e+226
-4.000938256631637e+226
-5.132773382823771e+226
-6.584796092705554e+226
-8.447585028321724e+226
-1.0837342843429541e+227
-1.3903145042313568e+227
-1.7836239460192092e+227
-2.2881976496188123e+227
-2.9355114318837853e+227
-3.765945379830245e

-4.2909253536096637e+266
-5.504795632920742e+266
-7.062060619332751e+266
-9.059864074312412e+266
-1.1622830993593484e+267
-1.4910841840183988e+267
-1.9129006049002322e+267
-2.454045696042688e+267
-3.148276634362681e+267
-4.0389002464205126e+267
-5.181474531966575e+267
-6.647274428035719e+267
-8.527738011451185e+267
-1.0940170498337462e+268
-1.4035061861888262e+268
-1.8005474548770988e+268
-2.309908690939127e+268
-2.9633643623351834e+268
-3.801677693323864e+268
-4.877143515530177e+268
-6.256850472319008e+268
-8.026866076075051e+268
-1.0297605686485944e+269
-1.3210720332112365e+269
-1.6947933044505903e+269
-2.1742374924314764e+269
-2.789312809462149e+269
-3.5783882745618823e+269
-4.590687211589966e+269
-5.889357849864933e+269
-7.555412574439561e+269
-9.692781560439328e+269
-1.2434796042274581e+270
-1.595250565060286e+270
-2.0465348660915073e+270
-2.625484077462091e+270
-3.3682136352612476e+270
-4.321055758877896e+270
-5.543449701605289e+270
-7.111649631248401e+270
-9.123481442068411e+270

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

# AutoRegression to predict at successive time intervals

At time t1, Temperature is predicted for 5 minute interval (t1+5) using the linear model . This predicted temperature is set as indoor temperature (t_in) to predict the temperature in the next 5 minute interval (t1+5+5). This process is repeated until the predicted temperature is for next one hour(t1+60).

Model is evaluated using Root mean squared error. To get true predictions at successive times, the test dataset(test_y) is shifted to n number of 5 minute intervals. 

In [None]:
import pandas as pd

def autoRegression(linearModel, test_X, time):
    next_pred5 = []
    n = time/5
    
    for index, row in test_X.iterrows():
        for i in range(1,n+1):
            t_next5 = singlePrediction(linearModel, row)
            row['t_in'] = t_next5            
        next_pred5.append(t_next5)
        
    test_pred = pd.DataFrame(next_pred5, index=test_X.index)
    next_test_y = test_y.shift(1-n,axis=0)
    nan_index = next_test_y.index[next_test_y.apply(np.isnan)]
    next_test_y = next_test_y.dropna()
    test_pred = test_pred.drop(nan_index) 
    rmse = mean_squared_error(next_test_y, test_pred)
    
    plt.figure(figsize=(15,4))
    plt.plot(next_test_y, 'g', linewidth=1)
    plt.plot(test_pred, 'r', linewidth=1)
    
    #print "time=", time, "rmse=", rmse
    return rmse
    


In [None]:
autoRegression(linearModel, test_X, 10)

In [None]:
time = []
rmse = []
for i in range(5,65,5):
    rmse_i = autoRegression(linearModel, test_X, i)
    rmse.append(rmse_i)
    time.append(i)
    
result = pd.DataFrame(rmse, index=time)
result
    

# Plot time vs rmse

In [None]:
result.plot()