In [54]:
#Importing Libraraies
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from xgboost import XGBRegressor
from datetime import datetime

In [55]:
#User Input Variables
index_reader = 1 #Input for Building
meter_reading = 'main_meter'#Use 'sub_meter_1' and 'sub_meter_2' for respective readings

In [56]:
###Making Files for building 
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
train['timestamp'] = pd.to_datetime(train['timestamp'],infer_datetime_format=True)  
test['timestamp'] = pd.to_datetime(test['timestamp'],infer_datetime_format=True)  


##Creating training files

for i in range(1,6):
    if(i==1):
        df = train[train['building_number']==1]
        df.index = df['timestamp']
        df = df.resample('1H').mean()
        new_df = df
    else:
        df = train[train['building_number']==i]
        df.index = df['timestamp']
        df = df.resample('1H').mean()
        new_df = pd.concat([new_df,df])
    
#Adding new features
new_df['day of week']=new_df.index.dayofweek 
new_df['Hour']=new_df.index.hour
new_df['corporate'] = new_df['Hour'].apply(lambda x: 0 if 0<= x <= 7 or 20<=x<=23 else 1)
new_df = new_df.drop(['Hour'],axis=1)
 
anomaly_mean_main_meter = new_df['main_meter'].mean()
new_df.loc[(new_df.main_meter > 15000),'main_meter']=anomaly_mean_main_meter
anomaly_mean_sub_meter_1 = new_df['sub_meter_1'].mean()
new_df.loc[(new_df.sub_meter_1 > 5000),'sub_meter_1']=anomaly_mean_sub_meter_1
anomaly_mean_sub_meter_2 = new_df['sub_meter_2'].mean()
new_df.loc[(new_df.sub_meter_2 > 3000),'sub_meter_2']=anomaly_mean_sub_meter_2

one_hot = ['day of week','building_number']
new_df = pd.get_dummies(new_df,columns = one_hot)

#Saving training file
for i in range(1,6):
    new_df[new_df['building_number_'+str(i)]==1].to_csv('./csv_files/intermediate_files/building_'+str(i)+'_train.csv')

##Creating testing files

for i in range(1,6):
    if(i==1):
        df_test = test[test['building_number']==1]
        df_test.index = df_test['timestamp']
        df_test = df_test.resample('1H').mean()
        new_df_test = df_test
    else:
        df_test = test[test['building_number']==i]
        df_test.index = df_test['timestamp']
        df_test = df_test.resample('1H').mean()
        new_df_test = pd.concat([new_df_test,df_test])

#Adding new features
new_df_test['main_meter'] = 'NaN'
new_df_test['sub_meter_1'] = 'NaN'
new_df_test['sub_meter_2'] = 'NaN'
new_df_test['day of week']=new_df_test.index.dayofweek
new_df_test['Hour']=new_df_test.index.hour
new_df_test['corporate'] = new_df_test['Hour'].apply(lambda x: 0 if 0<= x <= 7 or 20<=x<=23 else 1)
new_df_test = new_df_test.drop(['Hour'],axis=1)

one_hot = ['day of week','building_number']
new_df_test = pd.get_dummies(new_df_test,columns = one_hot)

#Saving test file
for i in range(1,6):
    new_df_test[new_df_test['building_number_'+str(i)]==1].to_csv('./csv_files/intermediate_files/building_'+str(i)+'_test.csv')

In [57]:
#Reading Files
dataframe = pd.read_csv('csv_files/intermediate_files/building_'+str(index_reader)+'_train.csv')
shape_old_dataframe = dataframe.shape[0]
test_building = pd.read_csv('csv_files/intermediate_files/building_'+str(index_reader)+'_test.csv')
test_for_index = pd.read_csv('./test.csv')

In [58]:
#Preprocessing Dataframe
dataframe['timestamp'] = pd.to_datetime(dataframe['timestamp'],infer_datetime_format=True)
dataframe.set_index(dataframe['timestamp'],inplace=True)
dataframe = dataframe.drop(['timestamp'],axis=1)

test_building['timestamp'] = pd.to_datetime(test_building['timestamp'],infer_datetime_format=True)
test_building.set_index(test_building['timestamp'],inplace=True)
test_building = test_building.drop(['timestamp'],axis=1)

using_columns = list(dataframe.columns)
using_columns.remove('main_meter')
using_columns.remove('sub_meter_1')
using_columns.remove('sub_meter_2')

#Preparing training data and testing data
X_train = dataframe[using_columns]
y_train = dataframe[meter_reading]

X_test = dataframe[using_columns] 

In [None]:
#Building the Model
xgb_model = XGBRegressor(n_estimators=1000)

#Fitting the model
xgb_model.fit(X_train,y_train)

In [None]:
#Predicting the values
xgb_predictions = xgb_model.predict(X_test)
xgb_predictions = np.repeat(xgb_predictions,4,axis=0)
xgb_predictions = pd.DataFrame(xgb_predictions)
xgb_predictions.columns = [meter_reading]
test_for_index = test_for_index[test_for_index['building_number']==index_reader]
xgb_predictions = xgb_predictions.iloc[:test_for_index.shape[0],:]
xgb_predictions.index = test_for_index['timestamp']

In [None]:
#Saving Dataframe into csv
xgb_predictions.to_csv('csv_files/prediction_files/XGB/building_'+str(index_reader)+'_'+str(meter_reading)+'_xgb_predictions.csv')

# Compiling Meter predictions for a Building

In [None]:
####Chunk of code for saving 3meter predictions for arima(Run this only after running all meters for a building)
#Input variable
index_reader = 1

main_meter = pd.read_csv('csv_files/prediction_files/XGB/building_'+str(index_reader)+'_'+'main_meter'+'_xgb_predictions.csv')
final_index = main_meter['timestamp']
main_meter = main_meter.drop(['timestamp'],axis=1)
sub_meter_1 = pd.read_csv('csv_files/prediction_files/XGB/building_'+str(index_reader)+'_'+'sub_meter_1'+'_xgb_predictions.csv')
sub_meter_1 = sub_meter_1.drop(['timestamp'],axis=1)
sub_meter_2 = pd.read_csv('csv_files/prediction_files/XGB/building_'+str(index_reader)+'_'+'sub_meter_2'+'_xgb_predictions.csv')
sub_meter_2 = sub_meter_2.drop(['timestamp'],axis=1)

all_meter = pd.concat([main_meter,sub_meter_1,sub_meter_2],axis=1)
all_meter.index = final_index

#Saving Dataframe into csv
all_meter.to_csv('csv_files/prediction_files/XGB/building_'+str(index_reader)+'_3meter_xgb_predictions.csv')

In [None]:
all_meter