In [1]:
import pandas as pd
import os
import numpy as np
from sklearn.metrics import r2_score

In [2]:
meta = pd.read_csv("../input/meta_open.csv", index_col='uid', parse_dates=["datastart","dataend"], dayfirst=True)
temporal = pd.read_csv("../input/temp_open_utc_complete.csv", index_col='timestamp', parse_dates=True).tz_localize('utc')

In [3]:
# All models types
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.dummy import DummyRegressor
from sklearn.tree import ExtraTreeRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.ensemble import GradientBoostingRegressor
from  sklearn.linear_model import HuberRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import TheilSenRegressor
    
# Make array of models. Each model is an array of two elements.
# First element is a model-name, second is a model itself
models = [#['RandomForestRegressor', RandomForestRegressor(n_estimators = 1000, random_state = 42)],
#['AdaBoostRegressor', AdaBoostRegressor(n_estimators = 1000, random_state = 42)],
#['BaggingRegressor', BaggingRegressor(n_estimators = 1000, random_state = 42)],
#['DecisionTreeRegressor', DecisionTreeRegressor(random_state = 42)],
['DummyRegressor', DummyRegressor()],
#['ExtraTreeRegressor', ExtraTreeRegressor(random_state = 42)],
#['ExtraTreesRegressor', ExtraTreesRegressor(n_estimators = 1000, random_state = 42)],
#['GaussianProcessRegressor', GaussianProcessRegressor(random_state = 42)],
#['GradientBoostingRegressor', GradientBoostingRegressor(n_estimators = 1000, random_state = 42)],
#['HuberRegressor', HuberRegressor()],
#['KNeighborsRegressor', KNeighborsRegressor()],
#['MLPRegressor', MLPRegressor(random_state = 42)],
#['PassiveAggressiveRegressor', PassiveAggressiveRegressor(random_state = 42)],
#['RANSACRegressor', RANSACRegressor(random_state = 42)],
#['SGDRegressor', SGDRegressor(random_state = 42)],
#['TheilSenRegressor', TheilSenRegressor(random_state = 42)]
]

  from numpy.core.umath_tests import inner1d


In [4]:
# Produce file with metrics(MAPE, NMBE, CVRSME, RSQUARED) based on provided model
# Results will be saved as modelName_metrics.csv
def createMetrics(modelName, model, buildingtype):
    buildingnames = temporal.columns[temporal.columns.str.contains(buildingtype)]
    print('\n\n' + modelName + '-' + buildingtype + '\n_____________')
    for singlebuilding in buildingnames[:]:
        print("Modelling: " + singlebuilding)
        # Get Data
        single_timezone = meta.T[singlebuilding].timezone
        single_start = meta.T[singlebuilding].datastart
        single_end = meta.T[singlebuilding].dataend
        single_building_data = pd.DataFrame(temporal[singlebuilding].tz_convert(single_timezone).truncate(before=single_start,after=single_end))

        # Split into Training and Testing
        trainingdata = single_building_data[single_building_data.index.month.isin(["1","2","3","5","6","7","9","10","11"])]
        testdata = single_building_data[single_building_data.index.month.isin(["4","8","12"])]

        # Get weather file
        weatherfilename = meta.T[singlebuilding].newweatherfilename
        print("Weatherfile: "+weatherfilename)
        weather = pd.read_csv(os.path.join("../input/",weatherfilename),index_col='timestamp', parse_dates=True, na_values='-9999')
        weather = weather.tz_localize(single_timezone, ambiguous = 'infer')
        outdoor_temp = pd.DataFrame(weather[[col for col in weather.columns if 'Temperature' in col]]).resample("H").mean()
        outdoor_temp = outdoor_temp.reindex(pd.DatetimeIndex(start=outdoor_temp.index[0], periods=len(single_building_data), freq="H")).fillna(method='ffill').fillna(method='bfill')

        # Create training data array
        train_features = np.array(pd.concat([pd.get_dummies(trainingdata.index.hour),
                                             pd.get_dummies(trainingdata.index.dayofweek),
                   pd.Series(outdoor_temp[outdoor_temp.index.month.isin(["1","2","3","5","6","7","9","10","11"])].TemperatureC.values)], axis=1))
        train_labels = np.array(trainingdata[singlebuilding].values)

        # Create test data array
        test_features = np.array(pd.concat([pd.get_dummies(testdata.index.hour),
                                             pd.get_dummies(testdata.index.dayofweek),
                   pd.Series(outdoor_temp[outdoor_temp.index.month.isin(["4","8","12"])].TemperatureC.values)], axis=1))
        test_labels = np.array(testdata[singlebuilding].values)


        # Train the model on training data
        model.fit(train_features, train_labels);
        # Use the forest's predict method on the test data
        predictions = model.predict(test_features)

        # Calculate the absolute errors
        errors = abs(predictions - test_labels)
        # Calculate mean absolute percentage error (MAPE) and add to list
        MAPE = 100 * np.mean((errors / test_labels))
        NMBE = 100 * (sum(test_labels - predictions) / (pd.Series(test_labels).count() * np.mean(test_labels)))
        CVRSME = 100 * ((sum((test_labels - predictions)**2) / (pd.Series(test_labels).count()-1))**(0.5)) / np.mean(test_labels)
        RSQUARED = r2_score(test_labels, predictions)

        print("MAPE: "+str(MAPE))
        print("NMBE: "+str(NMBE))
        print("CVRSME: "+str(CVRSME))
        print("R SQUARED: "+str(RSQUARED))

        MAPE_data[singlebuilding] = MAPE
        NMBE_data[singlebuilding] = NMBE
        CVRSME_data[singlebuilding] = CVRSME
        RSQUARED_data[singlebuilding] = RSQUARED

        metrics = pd.DataFrame([MAPE_data, NMBE_data, CVRSME_data, RSQUARED_data]).T
        metrics.columns = ["MAPE", "NMBE", "CVRSME", "RSQUARED"]
        metrics.to_csv('../results/' + modelName + '_metrics_' + buildingtype + '.csv')


In [5]:
for elem in models:
    # Go over all building types
    buildingtypes = ['Office', 'PrimClass', 'UnivClass', 'UnivDorm', 'UnivLab']
    for buildingtype in buildingtypes:
        # clear values
        MAPE_data = {}
        RSQUARED_data = {}
        NMBE_data = {}
        CVRSME_data = {}
        # modelName, model, buildingtype
        createMetrics(elem[0], elem[1], buildingtype)
    



DummyRegressor-Office
_____________
Modelling: Office_Cristina
Weatherfile: weather2.csv
MAPE: 31.48652791375246
NMBE: -1.4879135920651274
CVRSME: 31.649425974673346
R SQUARED: -0.0022160616661179855
Modelling: Office_Jesus
Weatherfile: weather1.csv
MAPE: 147.472717445552
NMBE: 9.316838322023584
CVRSME: 39.557826365804324
R SQUARED: -0.05875781584640505
Modelling: Office_Jett
Weatherfile: weather1.csv
MAPE: 185.0037145890515
NMBE: -7.02307076064646
CVRSME: 72.05239208060017
R SQUARED: -0.009596254286377004
Modelling: Office_Jerry
Weatherfile: weather1.csv
MAPE: 118.61073700102605
NMBE: 12.04017641143145
CVRSME: 57.38432865322417
R SQUARED: -0.046072037518315456
Modelling: Office_Lesa
Weatherfile: weather5.csv
MAPE: 44.44489509258323
NMBE: 8.00090723208205
CVRSME: 45.754430741822965
R SQUARED: -0.03155748073946718
Modelling: Office_Jackie
Weatherfile: weather1.csv
MAPE: 674.5585006608691
NMBE: -11.713703038791841
CVRSME: 103.37679101735105
R SQUARED: -0.013012291286627953
Modelling: O

MAPE: 36.498114408380225
NMBE: 9.594184744694418
CVRSME: 37.91503540165542
R SQUARED: -0.06844506757981805
Modelling: Office_Abigail
Weatherfile: weather0.csv
MAPE: 39.16558103120423
NMBE: 11.578050835147732
CVRSME: 50.777281118813356
R SQUARED: -0.05486903185600256
Modelling: Office_Mark
Weatherfile: weather3.csv
MAPE: 30.852335793691942
NMBE: -0.4877402716748944
CVRSME: 34.29072268678639
R SQUARED: -0.00020244572185501575
Modelling: Office_Travis
Weatherfile: weather8.csv
MAPE: 6.939848214699243
NMBE: -0.8017922898844742
CVRSME: 8.830593806487125
R SQUARED: -0.008316441161883459
Modelling: Office_Lena
Weatherfile: weather5.csv
MAPE: 29.052340378863594
NMBE: 1.1114501378977897
CVRSME: 39.771296538278726
R SQUARED: -0.0007819458296687287
Modelling: Office_Max
Weatherfile: weather3.csv
MAPE: 40.847927189269846
NMBE: -6.057683616301033
CVRSME: 38.54599252195258
R SQUARED: -0.025334779770609384
Modelling: Office_Gustavo
Weatherfile: weather5.csv
MAPE: 27.63168458800666
NMBE: -5.0515615501

MAPE: 33.56523841245596
NMBE: -11.896756281727857
CVRSME: 34.31762865659873
R SQUARED: -0.13666303486320452
Modelling: Office_Stella
Weatherfile: weather6.csv
MAPE: 40.0540995168797
NMBE: -6.722420010422474
CVRSME: 38.867179249423934
R SQUARED: -0.03085163173431793
Modelling: Office_Louise
Weatherfile: weather5.csv
MAPE: 30.204175636805154
NMBE: 3.078276269279132
CVRSME: 33.38645377365366
R SQUARED: -0.008577896206443336
Modelling: Office_Guillermo
Weatherfile: weather5.csv
MAPE: 44.929892168064136
NMBE: -9.392059314634839
CVRSME: 39.529979827245896
R SQUARED: -0.059856639173353265
Modelling: Office_Paulina
Weatherfile: weather4.csv
MAPE: 12.42941409403817
NMBE: -1.95386605354244
CVRSME: 14.800569289983855
R SQUARED: -0.017744685783686265
Modelling: Office_Gabriela
Weatherfile: weather5.csv
MAPE: 21.252812020283937
NMBE: -0.5677530371802516
CVRSME: 24.126227831782096
R SQUARED: -0.0005543416365252352
Modelling: Office_Carolina
Weatherfile: weather2.csv
MAPE: 22.389896483038854
NMBE: -1

Modelling: Office_Georgia
Weatherfile: weather5.csv
MAPE: 265.02595802737005
NMBE: -11.233735638743264
CVRSME: 106.72315336986452
R SQUARED: -0.01120904943440526


DummyRegressor-PrimClass
_____________
Modelling: PrimClass_Jolie
Weatherfile: weather1.csv
MAPE: 410.1061480747455
NMBE: -37.805083502964955
CVRSME: 134.59424479383853
R SQUARED: -0.08569425317096568
Modelling: PrimClass_Jaylin
Weatherfile: weather1.csv
MAPE: 253.54604736371846
NMBE: -35.918194856392994
CVRSME: 125.55364317163284
R SQUARED: -0.08917984166824922
Modelling: PrimClass_Uma
Weatherfile: weather10.csv
MAPE: 74.49973459733478
NMBE: -6.1364720503583055
CVRSME: 71.40232236531818
R SQUARED: -0.007444416727188763
Modelling: PrimClass_Jayla
Weatherfile: weather1.csv
MAPE: 327.09481778659824
NMBE: -32.62923747774858
CVRSME: 93.55404042911925
R SQUARED: -0.1385612842992674
Modelling: PrimClass_Janiya
Weatherfile: weather1.csv
MAPE: 520.2002051407242
NMBE: -41.24030520473359
CVRSME: 130.56810742730576
R SQUARED: -0.110874

MAPE: 3178.168355076643
NMBE: -28.34866409241405
CVRSME: 98.92553264987266
R SQUARED: -0.08951107802074265
Modelling: PrimClass_Jimmie
Weatherfile: weather1.csv
MAPE: 424.5179250265724
NMBE: -33.25328245178126
CVRSME: 110.80425777418539
R SQUARED: -0.09902890957676425
Modelling: PrimClass_Jayda
Weatherfile: weather1.csv
MAPE: 459.2752892563235
NMBE: -47.93509886421292
CVRSME: 145.07428887237694
R SQUARED: -0.12261827358821797
Modelling: PrimClass_Jocelyn
Weatherfile: weather1.csv
MAPE: 267.5709608388253
NMBE: -28.958333441572304
CVRSME: 93.75446151979033
R SQUARED: -0.10551779707672293
Modelling: PrimClass_Jeffery
Weatherfile: weather1.csv
MAPE: 179.87316686357065
NMBE: -13.65757835441925
CVRSME: 79.34966884811739
R SQUARED: -0.030543603415931875
Modelling: PrimClass_Jayson
Weatherfile: weather1.csv
MAPE: 2394.7714817340616
NMBE: -34.79632103979836
CVRSME: 89.39997688679836
R SQUARED: -0.17863565944441273
Modelling: PrimClass_Jennie
Weatherfile: weather1.csv
MAPE: 326.69667049709363
NM

MAPE: 76.36907522496324
NMBE: -10.911727610264897
CVRSME: 56.07221881954473
R SQUARED: -0.039378768080862825
Modelling: PrimClass_Ernest
Weatherfile: weather21.csv
MAPE: 45.4123166372106
NMBE: 0.8439433038889808
CVRSME: 53.659249361661765
R SQUARED: -0.0002475378327246869
Modelling: PrimClass_Emily
Weatherfile: weather32.csv
MAPE: 48.52181428794251
NMBE: -1.0639172031731967
CVRSME: 47.76203671804112
R SQUARED: -0.0004966636398779922
Modelling: PrimClass_Jazmin
Weatherfile: weather1.csv
MAPE: 190.58568846889443
NMBE: -13.460392335135719
CVRSME: 90.14249079660753
R SQUARED: -0.022816594827766323
Modelling: PrimClass_Jenna
Weatherfile: weather1.csv
MAPE: 315.58699966959034
NMBE: -18.608566478224304
CVRSME: 70.21858340772644
R SQUARED: -0.07557144168988383


DummyRegressor-UnivClass
_____________
Modelling: UnivClass_Tamra
Weatherfile: weather8.csv
MAPE: 150.07745711794166
NMBE: 16.29085040193253
CVRSME: 98.81413565372087
R SQUARED: -0.027952399813860662
Modelling: UnivClass_Tammy
Weatherf

Weatherfile: weather0.csv
MAPE: 31.293107534689003
NMBE: 5.1679083213820265
CVRSME: 42.395473844887995
R SQUARED: -0.015090093805384619
Modelling: UnivClass_Annabella
Weatherfile: weather0.csv
MAPE: 21.236403187552643
NMBE: -0.5833170307598137
CVRSME: 25.366636219767834
R SQUARED: -0.0005293100046968036
Modelling: UnivClass_Antoinette
Weatherfile: weather0.csv
MAPE: 41.231411613305234
NMBE: 6.132809953299769
CVRSME: 37.04791380417098
R SQUARED: -0.028187766587099317
Modelling: UnivClass_Sylvia
Weatherfile: weather6.csv
MAPE: 51.57383773798696
NMBE: -11.919018102973467
CVRSME: 46.26297728998943
R SQUARED: -0.07112998451215424
Modelling: UnivClass_Peter
Weatherfile: weather4.csv
MAPE: 23.682264010704355
NMBE: -5.56761553112215
CVRSME: 24.05434984774002
R SQUARED: -0.056633353729623526
Modelling: UnivClass_Aoibhe
Weatherfile: weather0.csv
MAPE: 11.30818043960302
NMBE: -0.23098687958011183
CVRSME: 13.918033904041259
R SQUARED: -0.0002756355756123874
Modelling: UnivClass_Ciara
Weatherfile: 

MAPE: 35.9890017743911
NMBE: -0.5304445093412427
CVRSME: 35.77220959571369
R SQUARED: -0.00022002937557275004
Modelling: UnivDorm_Cian
Weatherfile: weather2.csv
MAPE: 39.23222730045136
NMBE: -5.679420367675019
CVRSME: 36.615555781671866
R SQUARED: -0.02466350365023451
Modelling: UnivDorm_Curtis
Weatherfile: weather2.csv
MAPE: 15.557285598827148
NMBE: -0.34494397347844113
CVRSME: 19.433625265343558
R SQUARED: -0.0003152993841957219
Modelling: UnivDorm_Leonard
Weatherfile: weather5.csv
MAPE: 31.58103780508409
NMBE: -15.837685644017919
CVRSME: 32.864774649552906
R SQUARED: -0.30265537523292196
Modelling: UnivDorm_Patrice
Weatherfile: weather4.csv
MAPE: 30.277310105589606
NMBE: -7.139664871103793
CVRSME: 31.690582861721378
R SQUARED: -0.05349645635025291
Modelling: UnivDorm_Candace
Weatherfile: weather2.csv
MAPE: 13.03590218011515
NMBE: -1.6963672332497601
CVRSME: 15.696685420797829
R SQUARED: -0.01182292933391027
Modelling: UnivDorm_Clayton
Weatherfile: weather2.csv
MAPE: 26.2178623728410

MAPE: 19.183783559963498
NMBE: 0.4238133005697046
CVRSME: 21.20080284024369
R SQUARED: -0.00039995851809027094
Modelling: UnivDorm_Cecilia
Weatherfile: weather2.csv
MAPE: 12.492469653684038
NMBE: -3.0845702784300943
CVRSME: 14.265052159596108
R SQUARED: -0.04907329020712958
Modelling: UnivDorm_Payton
Weatherfile: weather4.csv
MAPE: 28.39212064070718
NMBE: -6.865254214021258
CVRSME: 27.402429818971207
R SQUARED: -0.06700353115799995
Modelling: UnivDorm_Lysander
Weatherfile: weather5.csv
MAPE: 29.165677967010488
NMBE: -9.230301241668132
CVRSME: 31.24057400218729
R SQUARED: -0.09569280522117785


DummyRegressor-UnivLab
_____________
Modelling: UnivLab_Paul
Weatherfile: weather4.csv
MAPE: 12.888545449378553
NMBE: -5.323847439077367
CVRSME: 14.527242742303608
R SQUARED: -0.15521941066269873
Modelling: UnivLab_Bethany
Weatherfile: weather7.csv
MAPE: 18.765814725122816
NMBE: -2.0090109011343182
CVRSME: 22.625896070491926
R SQUARED: -0.007950398685978355
Modelling: UnivLab_Crystal
Weatherfile:

MAPE: 31.315961639434953
NMBE: -8.578027648419049
CVRSME: 34.93720891697524
R SQUARED: -0.0641816647327933
Modelling: UnivLab_Lauren
Weatherfile: weather5.csv
MAPE: 14.920546806738724
NMBE: 5.774291501658552
CVRSME: 19.68107479829826
R SQUARED: -0.0942337862665601
Modelling: UnivLab_Ariel
Weatherfile: weather0.csv
MAPE: 8.887175320429169
NMBE: -0.6461641302645
CVRSME: 9.939877304508434
R SQUARED: -0.004245808559470099
Modelling: UnivLab_Cam
Weatherfile: weather2.csv
MAPE: 9.108896520592834
NMBE: -0.5368937496995303
CVRSME: 11.35230537546121
R SQUARED: -0.0022427363360684183
Modelling: UnivLab_Phil
Weatherfile: weather4.csv
MAPE: 10.385457850371244
NMBE: 0.3234883747989106
CVRSME: 12.57893083463623
R SQUARED: -0.0006620855028713901
Modelling: UnivLab_Ashlee
Weatherfile: weather0.csv
MAPE: 6.019648913498461
NMBE: -1.4312539160913273
CVRSME: 7.360331686246824
R SQUARED: -0.03931728214921182
Modelling: UnivLab_Allan
Weatherfile: weather0.csv
MAPE: 7.529840077732921
NMBE: -2.152592904811429