In [1]:
#Importing Libraraies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt  # For plotting graphs 
%matplotlib inline
import warnings                  
warnings.filterwarnings("ignore")  # To ignore the warnings 
plt.style.use('fivethirtyeight')
from pylab import rcParams
from plotly import tools
import chart_studio.plotly as py
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.figure_factory as ff
from pmdarima import auto_arima

In [2]:
#User Input Variables
index_reader = 1 #Input for Building
meter_reading = 'main_meter' #Use 'sub_meter_1' and 'sub_meter_2' for respective readings

In [3]:
###Making Files for building 
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
train['timestamp'] = pd.to_datetime(train['timestamp'],infer_datetime_format=True)  
test['timestamp'] = pd.to_datetime(test['timestamp'],infer_datetime_format=True)  


##Creating training files

for i in range(1,6):
    if(i==1):
        df = train[train['building_number']==1]
        df.index = df['timestamp']
        df = df.resample('1H').mean()
        new_df = df
    else:
        df = train[train['building_number']==i]
        df.index = df['timestamp']
        df = df.resample('1H').mean()
        new_df = pd.concat([new_df,df])
    
#Adding new features
new_df['day of week']=new_df.index.dayofweek 
new_df['Hour']=new_df.index.hour
new_df['corporate'] = new_df['Hour'].apply(lambda x: 0 if 0<= x <= 7 or 20<=x<=23 else 1)
new_df = new_df.drop(['Hour'],axis=1)
 
anomaly_mean_main_meter = new_df['main_meter'].mean()
new_df.loc[(new_df.main_meter > 15000),'main_meter']=anomaly_mean_main_meter
anomaly_mean_sub_meter_1 = new_df['sub_meter_1'].mean()
new_df.loc[(new_df.sub_meter_1 > 5000),'sub_meter_1']=anomaly_mean_sub_meter_1
anomaly_mean_sub_meter_2 = new_df['sub_meter_2'].mean()
new_df.loc[(new_df.sub_meter_2 > 3000),'sub_meter_2']=anomaly_mean_sub_meter_2

one_hot = ['day of week','building_number']
new_df = pd.get_dummies(new_df,columns = one_hot)

#Saving training file
for i in range(1,6):
    new_df[new_df['building_number_'+str(i)]==1].to_csv('./csv_files/intermediate_files/building_'+str(i)+'_train.csv')

##Creating testing files

for i in range(1,6):
    if(i==1):
        df_test = test[test['building_number']==1]
        df_test.index = df_test['timestamp']
        df_test = df_test.resample('1H').mean()
        new_df_test = df_test
    else:
        df_test = test[test['building_number']==i]
        df_test.index = df_test['timestamp']
        df_test = df_test.resample('1H').mean()
        new_df_test = pd.concat([new_df_test,df_test])

#Adding new features
new_df_test['main_meter'] = 'NaN'
new_df_test['sub_meter_1'] = 'NaN'
new_df_test['sub_meter_2'] = 'NaN'
new_df_test['day of week']=new_df_test.index.dayofweek
new_df_test['Hour']=new_df_test.index.hour
new_df_test['corporate'] = new_df_test['Hour'].apply(lambda x: 0 if 0<= x <= 7 or 20<=x<=23 else 1)
new_df_test = new_df_test.drop(['Hour'],axis=1)

one_hot = ['day of week','building_number']
new_df_test = pd.get_dummies(new_df_test,columns = one_hot)

#Saving test file
for i in range(1,6):
    new_df_test[new_df_test['building_number_'+str(i)]==1].to_csv('./csv_files/intermediate_files/building_'+str(i)+'_test.csv')

In [4]:
#Reading Files
dataframe = pd.read_csv('csv_files/intermediate_files/building_'+str(index_reader)+'_train.csv')
shape_old_dataframe = dataframe.shape[0]
test_building = pd.read_csv('csv_files/intermediate_files/building_'+str(index_reader)+'_test.csv')
test_for_index = pd.read_csv('./test.csv')

In [5]:
#Preprocessing Dataframe
dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp'],infer_datetime_format=True)
dataframe.set_index(dataframe['timestamp'],inplace=True)
dataframe = dataframe.drop(['timestamp'],axis=1)

dropping_columns = list(dataframe.columns)
dropping_columns.remove(meter_reading)
dataframe = dataframe.drop(columns=dropping_columns)

In [6]:
#Building the model
stepwise_model = auto_arima(dataframe, start_p=1, start_q=1,max_p=5, max_q=5,start_P=0, seasonal=False,d=1, D=1, trace=True,error_action='ignore',  suppress_warnings=True, stepwise=True)
print(stepwise_model.aic())

#Training and fitting 
train = dataframe.loc[:]
stepwise_model.fit(train)

Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 0, 0, 0); AIC=107002.894, BIC=107030.073, Fit time=1.162 seconds
Fit ARIMA: order=(0, 1, 0) seasonal_order=(0, 0, 0, 0); AIC=107199.624, BIC=107213.214, Fit time=0.137 seconds
Fit ARIMA: order=(1, 1, 0) seasonal_order=(0, 0, 0, 0); AIC=107001.662, BIC=107022.046, Fit time=0.412 seconds
Fit ARIMA: order=(0, 1, 1) seasonal_order=(0, 0, 0, 0); AIC=107005.539, BIC=107025.923, Fit time=0.376 seconds
Fit ARIMA: order=(0, 1, 0) seasonal_order=(0, 0, 0, 0); AIC=107197.624, BIC=107204.419, Fit time=0.062 seconds
Fit ARIMA: order=(2, 1, 0) seasonal_order=(0, 0, 0, 0); AIC=107002.463, BIC=107029.642, Fit time=0.427 seconds
Fit ARIMA: order=(2, 1, 1) seasonal_order=(0, 0, 0, 0); AIC=106421.323, BIC=106455.297, Fit time=5.999 seconds
Near non-invertible roots for order (2, 1, 1)(0, 0, 0, 0); setting score to inf (at least one inverse root too close to the border of the unit circle: 0.996)
Total fit time: 8.579 seconds
106421.32323711767


ARIMA(maxiter=50, method='lbfgs', order=(2, 1, 1), out_of_sample_size=0,
      scoring='mse', scoring_args=None, seasonal_order=(0, 0, 0, 0),
      with_intercept=True)

In [7]:
#Predicting the values
arima_forecast = stepwise_model.predict(n_periods=test_building.shape[0])
arima_forecast = np.repeat(arima_forecast,4,axis=0)
arima_forecast = pd.DataFrame(arima_forecast)
arima_forecast.columns = [meter_reading]
test_for_index = test_for_index[test_for_index['building_number']==index_reader]
arima_forecast = arima_forecast.iloc[:test_for_index.shape[0],:]
arima_forecast.index = test_for_index['timestamp']

In [8]:
#Saving Dataframe into csv
arima_forecast.to_csv('csv_files/prediction_files/Arima/building_'+str(index_reader)+'_'+str(meter_reading)+'_arima_predictions.csv')

# Compiling Meter predictions for a Building

In [88]:
####Chunk of code for saving 3meter predictions for arima(Run this only after running all meters for a building)
#Input variable
index_reader = 1

main_meter = pd.read_csv('csv_files/prediction_files/Arima/building_'+str(index_reader)+'_'+'main_meter'+'_arima_predictions.csv')
final_index = main_meter['timestamp']
main_meter = main_meter.drop(['timestamp'],axis=1)
sub_meter_1 = pd.read_csv('csv_files/prediction_files/Arima/building_'+str(index_reader)+'_'+'sub_meter_1'+'_arima_predictions.csv')
sub_meter_1 = sub_meter_1.drop(['timestamp'],axis=1)
sub_meter_2 = pd.read_csv('csv_files/prediction_files/Arima/building_'+str(index_reader)+'_'+'sub_meter_2'+'_arima_predictions.csv')
sub_meter_2 = sub_meter_2.drop(['timestamp'],axis=1)

all_meter = pd.concat([main_meter,sub_meter_1,sub_meter_2],axis=1)
all_meter.index = final_index

#Saving Dataframe into csv
all_meter.to_csv('csv_files/prediction_files/Arima/building_'+str(index_reader)+'_3meter_arima_predictions.csv')

In [89]:
all_meter

Unnamed: 0_level_0,main_meter,sub_meter_1,sub_meter_2
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01 00:00:00,2493.628653,646.243529,37.562042
2018-01-01 00:15:00,2493.628653,646.243529,37.562042
2018-01-01 00:30:00,2493.628653,646.243529,37.562042
2018-01-01 00:45:00,2493.628653,646.243529,37.562042
2018-01-01 01:00:00,2929.247002,670.184964,86.874990
...,...,...,...
2018-04-18 12:00:00,5154.265814,394.494997,349.339281
2018-04-18 12:15:00,5154.265814,394.494997,349.339281
2018-04-18 12:30:00,5154.265814,394.494997,349.339281
2018-04-18 12:45:00,5154.265814,394.494997,349.339281
