## 5. Target
We compared different models. The Random Forest Regressor had the best results, so this is the model we will use 
to make the predictions for the target. <br>
The predictions will be saved in the file ```RandomForest_Predictions.csv```. This file is needed for the visualizations 
in our dashboard.

In [8]:
## load modules
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
import ast
import sys
sys.path.append("..")
from modeling.functions import get_features
from sklearn.preprocessing import MinMaxScaler

In [9]:
## read data
data = pd.read_csv('../data/GEFCom2014Data/Wind/clean_data.csv', 
                    parse_dates= ['TIMESTAMP'],
                    index_col= 'TIMESTAMP' )

In [10]:
# train-test-split 
data_train = data[:'2013-07-01 00:00:00']
data_test = data['2013-07-01 01:00:00':]
feature_dict = get_features(data)

In [11]:
# load the model we use to make the predictions
model_params = pd.DataFrame()
model_params = pd.read_csv(f'../results/RandomForestRegressor.csv', index_col='ZONE')

# add column ZONEID
model_params['ZONEID'] = range(1,11) 

model_params

Unnamed: 0_level_0,BEST_PARAMS,CV,MODEL,FC,TESTSCORE,TRAINSCORE,ZONEID
ZONE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ZONE1,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri...",0.170892,RandomForestRegressor,no_deg_norm,0.177034,0.140094,1
ZONE2,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri...",0.132224,RandomForestRegressor,no_deg_norm,0.175856,0.109415,2
ZONE3,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri...",0.148439,RandomForestRegressor,no_comp,0.149962,0.124022,3
ZONE4,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri...",0.17002,RandomForestRegressor,no_comp_plus_100Norm,0.171536,0.128381,4
ZONE5,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri...",0.172481,RandomForestRegressor,all,0.171554,0.142998,5
ZONE6,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri...",0.175007,RandomForestRegressor,no_deg,0.181507,0.14599,6
ZONE7,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri...",0.125856,RandomForestRegressor,no_card_100Norm,0.141791,0.090274,7
ZONE8,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri...",0.152973,RandomForestRegressor,no_comp,0.192363,0.125311,8
ZONE9,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri...",0.162231,RandomForestRegressor,no_deg_norm,0.151015,0.132315,9
ZONE10,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri...",0.192226,RandomForestRegressor,no_comp,0.204616,0.16398,10


In [6]:
# make the predictions
model = RandomForestRegressor()
df_pred = pd.DataFrame(index=data_test[data_test.ZONEID == 1].index)

# we scale the features, because they have different scales. 
scaler = MinMaxScaler() 

for zone in model_params.ZONEID:
    fc = model_params[model_params.ZONEID == zone]['FC'].values[0]
    
    data_train_zone = data_train[data_train.ZONEID == zone]
    data_test_zone = data_test[data_test.ZONEID == zone]
        
    X_train = data_train_zone[feature_dict[fc]]
    X_train = scaler.fit_transform(X_train)

    X_test = data_test_zone[feature_dict[fc]]
    X_test = scaler.transform(X_test)


    y_train = data_train_zone.TARGETVAR
    y_test = data_test_zone.TARGETVAR
    
    best_params = model_params[model_params.ZONEID == zone]['BEST_PARAMS'].values[0]
    model = model.set_params(**ast.literal_eval(best_params))

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # predictions can't have values larger than 1 or smaller than 0, because the enery output 
    # consists of nornmalized values in [0,1].
    y_pred = np.array([1 if value >= 1 else 0 if value <= 0 else value for value in y_pred])

    df_pred[f'Zone {zone}'] = y_pred
    

In [12]:
# save the predictions in ```RandomForest_Predictions.csv ```
df_pred.to_csv('../results/RandomForest_Predictions.csv')
df = pd.read_csv('../results/RandomForest_Predictions.csv', index_col='TIMESTAMP', parse_dates= ['TIMESTAMP'])
df 

Unnamed: 0_level_0,Zone 1,Zone 2,Zone 3,Zone 4,Zone 5,Zone 6,Zone 7,Zone 8,Zone 9,Zone 10
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2013-07-01 01:00:00,0.663787,0.303875,0.268849,0.105950,0.161837,0.208544,0.572336,0.523364,0.609002,0.202555
2013-07-01 02:00:00,0.576531,0.272944,0.243955,0.129480,0.165035,0.260800,0.499121,0.522322,0.532590,0.180590
2013-07-01 03:00:00,0.551094,0.200067,0.232500,0.119985,0.229722,0.288432,0.507226,0.541530,0.607430,0.320768
2013-07-01 04:00:00,0.550652,0.184491,0.224215,0.193972,0.265429,0.359370,0.648843,0.601712,0.502306,0.212353
2013-07-01 05:00:00,0.477922,0.178671,0.226642,0.231144,0.281932,0.348403,0.560319,0.537393,0.453832,0.172406
...,...,...,...,...,...,...,...,...,...,...
2013-12-31 20:00:00,0.736336,0.626008,0.759972,0.857648,0.849007,0.858754,0.599869,0.500731,0.676839,0.879829
2013-12-31 21:00:00,0.772560,0.679814,0.725188,0.905970,0.863707,0.865298,0.592687,0.498410,0.670995,0.862189
2013-12-31 22:00:00,0.758845,0.633772,0.768021,0.926823,0.865309,0.863108,0.515467,0.490383,0.713100,0.832610
2013-12-31 23:00:00,0.382185,0.636487,0.825061,0.713358,0.807285,0.858691,0.375964,0.366193,0.466791,0.526636
