In [4]:
import pandas as pd
import os
import numpy as np
from sklearn.metrics import r2_score

In [5]:
meta = pd.read_csv("../input/meta_open.csv", index_col='uid', parse_dates=["datastart","dataend"], dayfirst=True)
temporal = pd.read_csv("../input/temp_open_utc_complete.csv", index_col='timestamp', parse_dates=True).tz_localize('utc')

In [6]:
# All models types
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.dummy import DummyRegressor
from sklearn.tree import ExtraTreeRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.ensemble import GradientBoostingRegressor
from  sklearn.linear_model import HuberRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import TheilSenRegressor
    
# Make array of models. Each model is an array of two elements.
# First element is a model-name, second is a model itself
models = [#['RandomForestRegressor', RandomForestRegressor(n_estimators = 1000, random_state = 42)],
#['AdaBoostRegressor', AdaBoostRegressor(n_estimators = 1000, random_state = 42)],
#['BaggingRegressor', BaggingRegressor(n_estimators = 1000, random_state = 42)],
#['DecisionTreeRegressor', DecisionTreeRegressor(random_state = 42)],
#['DummyRegressor', DummyRegressor()],
#['ExtraTreeRegressor', ExtraTreeRegressor(random_state = 42)],
#['ExtraTreesRegressor', ExtraTreesRegressor(n_estimators = 1000, random_state = 42)],
#['GaussianProcessRegressor', GaussianProcessRegressor(random_state = 42)],
#['GradientBoostingRegressor', GradientBoostingRegressor(n_estimators = 1000, random_state = 42)],
#['HuberRegressor', HuberRegressor()],
['KNeighborsRegressor', KNeighborsRegressor()],
#['MLPRegressor', MLPRegressor(random_state = 42)],
#['PassiveAggressiveRegressor', PassiveAggressiveRegressor(random_state = 42)],
#['RANSACRegressor', RANSACRegressor(random_state = 42)],
#['SGDRegressor', SGDRegressor(random_state = 42)],
#['TheilSenRegressor', TheilSenRegressor(random_state = 42)]
]

In [7]:
# Produce file with metrics(MAPE, NMBE, CVRSME, RSQUARED) based on provided model
# Results will be saved as modelName_metrics.csv
def createMetrics(modelName, model, buildingtype):
    buildingnames = temporal.columns[temporal.columns.str.contains(buildingtype)]
    print('\n\n' + modelName + '-' + buildingtype + '\n_____________')
    for singlebuilding in buildingnames[:]:
        print("Modelling: " + singlebuilding)
        # Get Data
        single_timezone = meta.T[singlebuilding].timezone
        single_start = meta.T[singlebuilding].datastart
        single_end = meta.T[singlebuilding].dataend
        single_building_data = pd.DataFrame(temporal[singlebuilding].tz_convert(single_timezone).truncate(before=single_start,after=single_end))

        # Split into Training and Testing
        trainingdata = single_building_data[single_building_data.index.month.isin(["1","2","3","5","6","7","9","10","11"])]
        testdata = single_building_data[single_building_data.index.month.isin(["4","8","12"])]

        # Get weather file
        weatherfilename = meta.T[singlebuilding].newweatherfilename
        print("Weatherfile: "+weatherfilename)
        weather = pd.read_csv(os.path.join("../input/",weatherfilename),index_col='timestamp', parse_dates=True, na_values='-9999')
        weather = weather.tz_localize(single_timezone, ambiguous = 'infer')
        outdoor_temp = pd.DataFrame(weather[[col for col in weather.columns if 'Temperature' in col]]).resample("H").mean()
        outdoor_temp = outdoor_temp.reindex(pd.DatetimeIndex(start=outdoor_temp.index[0], periods=len(single_building_data), freq="H")).fillna(method='ffill').fillna(method='bfill')

        # Create training data array
        train_features = np.array(pd.concat([pd.get_dummies(trainingdata.index.hour),
                                             pd.get_dummies(trainingdata.index.dayofweek),
                   pd.Series(outdoor_temp[outdoor_temp.index.month.isin(["1","2","3","5","6","7","9","10","11"])].TemperatureC.values)], axis=1))
        train_labels = np.array(trainingdata[singlebuilding].values)

        # Create test data array
        test_features = np.array(pd.concat([pd.get_dummies(testdata.index.hour),
                                             pd.get_dummies(testdata.index.dayofweek),
                   pd.Series(outdoor_temp[outdoor_temp.index.month.isin(["4","8","12"])].TemperatureC.values)], axis=1))
        test_labels = np.array(testdata[singlebuilding].values)


        # Train the model on training data
        model.fit(train_features, train_labels);
        # Use the forest's predict method on the test data
        predictions = model.predict(test_features)

        # Calculate the absolute errors
        errors = abs(predictions - test_labels)
        # Calculate mean absolute percentage error (MAPE) and add to list
        MAPE = 100 * np.mean((errors / test_labels))
        NMBE = 100 * (sum(test_labels - predictions) / (pd.Series(test_labels).count() * np.mean(test_labels)))
        CVRSME = 100 * ((sum((test_labels - predictions)**2) / (pd.Series(test_labels).count()-1))**(0.5)) / np.mean(test_labels)
        RSQUARED = r2_score(test_labels, predictions)

        print("MAPE: "+str(MAPE))
        print("NMBE: "+str(NMBE))
        print("CVRSME: "+str(CVRSME))
        print("R SQUARED: "+str(RSQUARED))

        MAPE_data[singlebuilding] = MAPE
        NMBE_data[singlebuilding] = NMBE
        CVRSME_data[singlebuilding] = CVRSME
        RSQUARED_data[singlebuilding] = RSQUARED

        metrics = pd.DataFrame([MAPE_data, NMBE_data, CVRSME_data, RSQUARED_data]).T
        metrics.columns = ["MAPE", "NMBE", "CVRSME", "RSQUARED"]
        metrics.to_csv('../results/' + modelName + '_metrics_' + buildingtype + '.csv')


In [8]:
for elem in models:
    # Go over all building types
    buildingtypes = ['Office', 'PrimClass', 'UnivClass', 'UnivDorm', 'UnivLab']
    for buildingtype in buildingtypes:
        # clear values
        MAPE_data = {}
        RSQUARED_data = {}
        NMBE_data = {}
        CVRSME_data = {}
        # modelName, model, buildingtype
        createMetrics(elem[0], elem[1], buildingtype)
    



KNeighborsRegressor-Office
_____________
Modelling: Office_Cristina
Weatherfile: weather2.csv
MAPE: 21.225875713031922
NMBE: 1.0541924066325037
CVRSME: 24.66763121007184
R SQUARED: 0.39118607506649883
Modelling: Office_Jesus
Weatherfile: weather1.csv
MAPE: 134.70437929126072
NMBE: 6.739775266157466
CVRSME: 29.87613631055983
R SQUARED: 0.39607827319401756
Modelling: Office_Jett
Weatherfile: weather1.csv
MAPE: 168.02294391210927
NMBE: -8.160714055667267
CVRSME: 58.32687964817959
R SQUARED: 0.33841079545226616
Modelling: Office_Jerry
Weatherfile: weather1.csv
MAPE: 92.52095702260344
NMBE: 10.184612270788424
CVRSME: 44.807085381841596
R SQUARED: 0.3622237062166117
Modelling: Office_Lesa
Weatherfile: weather5.csv
MAPE: 32.05024564811596
NMBE: 10.0187717713871
CVRSME: 39.94070964972197
R SQUARED: 0.21393461176958595
Modelling: Office_Jackie
Weatherfile: weather1.csv
MAPE: 783.3850193483452
NMBE: -15.83643996149203
CVRSME: 65.72682490714081
R SQUARED: 0.5904999798427414
Modelling: Office_Ma

MAPE: 18.774745369861574
NMBE: 6.879573528401786
CVRSME: 24.222145522588825
R SQUARED: 0.759959305179218
Modelling: Office_Mark
Weatherfile: weather3.csv
MAPE: 14.305557130447419
NMBE: -0.5869747759996838
CVRSME: 19.429127357305028
R SQUARED: 0.6788993193783794
Modelling: Office_Travis
Weatherfile: weather8.csv
MAPE: 4.215843874250681
NMBE: -0.3237505334988387
CVRSME: 6.2904665271882285
R SQUARED: 0.48833849562801623
Modelling: Office_Lena
Weatherfile: weather5.csv
MAPE: 29.30636137609799
NMBE: -4.218582769569221
CVRSME: 37.2153803094967
R SQUARED: 0.1237159905642976
Modelling: Office_Max
Weatherfile: weather3.csv
MAPE: 23.074751916089305
NMBE: -3.0795501067894575
CVRSME: 27.16949884061243
R SQUARED: 0.49058646635790704
Modelling: Office_Gustavo
Weatherfile: weather5.csv
MAPE: 14.622434066384372
NMBE: -6.804301096363087
CVRSME: 20.563801560421417
R SQUARED: 0.5260779970863505
Modelling: Office_Penny
Weatherfile: weather4.csv
MAPE: 18.336476246791467
NMBE: -4.2132973905437785
CVRSME: 23

MAPE: 26.383419818527877
NMBE: 0.5703930535686518
CVRSME: 28.194231914393765
R SQUARED: 0.2807342213106435
Modelling: Office_Guillermo
Weatherfile: weather5.csv
MAPE: 18.852519375937113
NMBE: -10.889240245027397
CVRSME: 22.717987569369793
R SQUARED: 0.6499471205598709
Modelling: Office_Paulina
Weatherfile: weather4.csv
MAPE: 9.83355287713882
NMBE: 0.17353291756978312
CVRSME: 12.593323531718534
R SQUARED: 0.2631777630926053
Modelling: Office_Gabriela
Weatherfile: weather5.csv
MAPE: 12.087006845968178
NMBE: -2.2757681318935683
CVRSME: 15.24952384903921
R SQUARED: 0.6002629543303963
Modelling: Office_Carolina
Weatherfile: weather2.csv
MAPE: 13.29505188340469
NMBE: -0.6721592134351941
CVRSME: 18.487873970692576
R SQUARED: 0.2175620743144412
Modelling: Office_Noel
Weatherfile: weather9.csv
MAPE: 7.967272636345227
NMBE: -1.1162083118295951
CVRSME: 11.072110150172994
R SQUARED: 0.40674729992408576
Modelling: Office_Alannah
Weatherfile: weather0.csv
MAPE: 23.460146307738437
NMBE: -1.4550601068

MAPE: 259.75411905632495
NMBE: -40.83828746334159
CVRSME: 121.47139486598181
R SQUARED: -0.019504140312656792
Modelling: PrimClass_Uma
Weatherfile: weather10.csv
MAPE: 33.46153407895916
NMBE: -3.8888855928448685
CVRSME: 47.44711258041017
R SQUARED: 0.5551475404346313
Modelling: PrimClass_Jayla
Weatherfile: weather1.csv
MAPE: 394.79990093599343
NMBE: -35.603623449894265
CVRSME: 89.33373691175356
R SQUARED: -0.03815528047326344
Modelling: PrimClass_Janiya
Weatherfile: weather1.csv
MAPE: 655.4446349685716
NMBE: -45.852454109036316
CVRSME: 117.35373482389059
R SQUARED: 0.1026031006121404
Modelling: PrimClass_Umar
Weatherfile: weather10.csv
MAPE: 40.24403526009924
NMBE: -2.949280595430593
CVRSME: 47.75824372163454
R SQUARED: 0.6397626263001416
Modelling: PrimClass_Janice
Weatherfile: weather1.csv
MAPE: 524.5130039577382
NMBE: -11.137058767695615
CVRSME: 71.28924081895978
R SQUARED: 0.4695229240244304
Modelling: PrimClass_Uriah
Weatherfile: weather10.csv
MAPE: 17.883919108076256
NMBE: -2.093

MAPE: 308.1978446581309
NMBE: -32.95183228983527
CVRSME: 89.55128009128083
R SQUARED: -0.008615057397426185
Modelling: PrimClass_Jeffery
Weatherfile: weather1.csv
MAPE: 158.63825709647713
NMBE: -17.071762033971208
CVRSME: 67.79698510554461
R SQUARED: 0.24768989097309302
Modelling: PrimClass_Jayson
Weatherfile: weather1.csv
MAPE: 3216.893053891691
NMBE: -37.01875491498331
CVRSME: 84.87494859861529
R SQUARED: -0.062340679615225714
Modelling: PrimClass_Jennie
Weatherfile: weather1.csv
MAPE: 364.0243434889035
NMBE: -45.44124571239993
CVRSME: 100.65950069865079
R SQUARED: -0.07595134206683585
Modelling: PrimClass_Jonathon
Weatherfile: weather1.csv
MAPE: 3356.086419473069
NMBE: -30.456493143166252
CVRSME: 65.91452928686113
R SQUARED: 0.01473094691539123
Modelling: PrimClass_Jaqueline
Weatherfile: weather1.csv
MAPE: 393.27751306944594
NMBE: -39.21342347332482
CVRSME: 94.07141971151873
R SQUARED: -0.12398727381614427
Modelling: PrimClass_Jeremy
Weatherfile: weather1.csv
MAPE: 448.1693188065142

MAPE: 147.00241186481477
NMBE: -17.332881374774413
CVRSME: 78.63422475751541
R SQUARED: 0.22167339438153544
Modelling: PrimClass_Jenna
Weatherfile: weather1.csv
MAPE: 369.09250799558765
NMBE: -21.126320646579885
CVRSME: 61.08200076545975
R SQUARED: 0.18611762609872817


KNeighborsRegressor-UnivClass
_____________
Modelling: UnivClass_Tamra
Weatherfile: weather8.csv
MAPE: 106.81522437382756
NMBE: 21.209390632101062
CVRSME: 92.4841153770538
R SQUARED: 0.09953021216441604
Modelling: UnivClass_Tammy
Weatherfile: weather8.csv
MAPE: 30.981113201995598
NMBE: 1.193188386426067
CVRSME: 35.55004753396903
R SQUARED: 0.5957979527215533
Modelling: UnivClass_Camden
Weatherfile: weather2.csv
MAPE: 17.784139629442027
NMBE: -0.6779488216278251
CVRSME: 25.56015929412608
R SQUARED: 0.5199522091024946
Modelling: UnivClass_Craig
Weatherfile: weather2.csv
MAPE: 21.02722168408908
NMBE: -1.8876020010532921
CVRSME: 27.35227542275246
R SQUARED: 0.15109124237564608
Modelling: UnivClass_Jadon
Weatherfile: weather

MAPE: 27.729531566462718
NMBE: -16.926882553186466
CVRSME: 33.66064788503021
R SQUARED: 0.43295181969688046
Modelling: UnivClass_Peter
Weatherfile: weather4.csv
MAPE: 12.801616740875588
NMBE: -2.596175406523106
CVRSME: 16.20232198240253
R SQUARED: 0.5206071125167704
Modelling: UnivClass_Aoibhe
Weatherfile: weather0.csv
MAPE: 5.634130160080578
NMBE: 1.3286384984967532
CVRSME: 8.371220841407428
R SQUARED: 0.6381392251905189
Modelling: UnivClass_Ciara
Weatherfile: weather2.csv
MAPE: 7.50178297471487
NMBE: 0.3897936073496877
CVRSME: 9.817096840066775
R SQUARED: 0.6001650845472641
Modelling: UnivClass_Anya
Weatherfile: weather0.csv
MAPE: 32.428178380235714
NMBE: 9.552250501731933
CVRSME: 41.49763001432671
R SQUARED: 0.1425703720462156
Modelling: UnivClass_Bob
Weatherfile: weather7.csv
MAPE: 26.521047699919293
NMBE: -0.49234726493875863
CVRSME: 28.049233529042095
R SQUARED: 0.6196907374449128
Modelling: UnivClass_Alicia
Weatherfile: weather0.csv
MAPE: 25.873372918999976
NMBE: 3.6574909939242

MAPE: 24.39114407866791
NMBE: -5.93173605014081
CVRSME: 28.260676863376947
R SQUARED: 0.1622050391296055
Modelling: UnivDorm_Candace
Weatherfile: weather2.csv
MAPE: 12.755952056545025
NMBE: 0.8754265594467628
CVRSME: 16.419999954590416
R SQUARED: -0.10722252116035236
Modelling: UnivDorm_Clayton
Weatherfile: weather2.csv
MAPE: 23.936376792658216
NMBE: -3.7372922947382046
CVRSME: 25.908072613702547
R SQUARED: 0.0009505746063986908
Modelling: UnivDorm_Ahmad
Weatherfile: weather0.csv
MAPE: 17.351383829039115
NMBE: 10.202242933263651
CVRSME: 25.245878278856406
R SQUARED: 0.0758973312478789
Modelling: UnivDorm_April
Weatherfile: weather0.csv
MAPE: 13.835020237228965
NMBE: 5.241099166870382
CVRSME: 18.121124037744227
R SQUARED: 0.23486899633060931
Modelling: UnivDorm_Corey
Weatherfile: weather2.csv
MAPE: 23.415932230293436
NMBE: -3.175535728537536
CVRSME: 28.728740605980633
R SQUARED: -0.19072045893322964
Modelling: UnivDorm_Cooper
Weatherfile: weather2.csv
MAPE: 25.58729648877749
NMBE: -10.1

MAPE: 10.621087464170467
NMBE: -3.876189698860398
CVRSME: 13.000016825928242
R SQUARED: 0.07490588210487803
Modelling: UnivLab_Bethany
Weatherfile: weather7.csv
MAPE: 11.376936832928546
NMBE: -2.0031840904297993
CVRSME: 15.295533871142954
R SQUARED: 0.5393650722966004
Modelling: UnivLab_Crystal
Weatherfile: weather2.csv
MAPE: 5.772589003224991
NMBE: -3.346127830188048
CVRSME: 7.246131489355686
R SQUARED: -0.42115369807517067
Modelling: UnivLab_Clint
Weatherfile: weather2.csv
MAPE: 23.710120507423092
NMBE: 1.0367963253641734
CVRSME: 33.329126382981265
R SQUARED: 0.4823270121546077
Modelling: UnivLab_Cesar
Weatherfile: weather2.csv
MAPE: 12.995909013110296
NMBE: 2.2250070695216975
CVRSME: 17.045671622560775
R SQUARED: 0.023606587944626
Modelling: UnivLab_Cindy
Weatherfile: weather2.csv
MAPE: 10.7401164040789
NMBE: -0.5114121130647922
CVRSME: 14.129826611212359
R SQUARED: 0.46294853973010597
Modelling: UnivLab_Neil
Weatherfile: weather9.csv
MAPE: 42.99653917055336
NMBE: -17.69926607067502

MAPE: 5.118365440201327
NMBE: -0.9374773921955799
CVRSME: 7.104002917525766
R SQUARED: 0.03181212491909735
Modelling: UnivLab_Allan
Weatherfile: weather0.csv
MAPE: 5.38630568971721
NMBE: -1.1877057607977954
CVRSME: 6.447119503602524
R SQUARED: 0.4358621210920036
Modelling: UnivLab_Bert
Weatherfile: weather7.csv
MAPE: 7.27213703564904
NMBE: -1.0172847128044682
CVRSME: 8.862549184854554
R SQUARED: 0.26762207017806927
Modelling: UnivLab_Callie
Weatherfile: weather2.csv
MAPE: 7.420539390052388
NMBE: 0.9745815850578642
CVRSME: 10.82485736987387
R SQUARED: 0.5787910052174396
Modelling: UnivLab_Caitlin
Weatherfile: weather2.csv
MAPE: 7.995222467856935
NMBE: -1.6918064421437327
CVRSME: 9.99213575001129
R SQUARED: -0.4057360078828445
Modelling: UnivLab_Ashlynn
Weatherfile: weather0.csv
MAPE: 3.4629078225005467
NMBE: -1.081684973137764
CVRSME: 4.419523856255416
R SQUARED: 0.3395917141549587
Modelling: UnivLab_Amaya
Weatherfile: weather0.csv
MAPE: 5.9209516881895174
NMBE: -1.6371808656860258
CVRS