## 6. Target
We compared different models. The Random Forest Regressor had the best results, so this is the model we will use 
to make the predictions for the target. <br>
The predictions will be saved in the file ```RandomForest_Predictions.csv```. This file is needed for the visualizations 
in our dashboard.

In [2]:
## load modules
import pandas as pd
import sys
sys.path.append("..")
from modeling.features import get_feature_combinations
from modeling.functions import predict_func, train_test_split_features, scaler_func
from sklearn.preprocessing import MinMaxScaler
import pickle

In [3]:
## read data
data = pd.read_csv('../data/GEFCom2014Data/Wind/clean_data.csv', 
                    parse_dates= ['TIMESTAMP'],
                    index_col= 'TIMESTAMP' )

In [4]:
# train-test-split 
data_train = data[:'2013-07-01 00:00:00']
data_test = data['2013-07-01 01:00:00':]

In [5]:
# load model parameter
df_model = pd.read_csv(f'../results/RandomForestRegressor.csv')

In [6]:
# make the predictions and save them in df_pred
df_pred = pd.DataFrame(index=data_test[data_test.ZONEID == 1].index)

for zone in data.ZONEID.unique():
    # obtain features from the current feature combination
    fc = df_model[df_model.ZONE == 'ZONE{}'.format(str(zone))]['FC'].values[0]

    # obtain data frame for zone
    data_train_zone = data_train[data_train.ZONEID == zone]
    data_test_zone = data_test[data_test.ZONEID == zone]

    # split in X and y
    features = get_feature_combinations()[fc]
    X_train, X_test, y_train, y_test = train_test_split_features(data_train, data_test, zone, features)  
    X_train, X_test= scaler_func(X_train, X_test, MinMaxScaler())

    # load the model for current windfarm 
    model = pickle.load(open('../models/model{}.pkl'.format(zone), 'rb'))
   
    # make prediction
    y_pred = predict_func(model, X_test, y_test)
    df_pred[f'Zone {zone}'] = y_pred
    

In [7]:
df_pred

Unnamed: 0_level_0,Zone 1,Zone 2,Zone 3,Zone 4,Zone 5,Zone 6,Zone 7,Zone 8,Zone 9,Zone 10
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2013-07-01 01:00:00,0.663787,0.303875,0.268849,0.105950,0.161837,0.208544,0.572336,0.523364,0.609002,0.202555
2013-07-01 02:00:00,0.576531,0.272944,0.243955,0.129480,0.165035,0.260800,0.499121,0.522322,0.532590,0.180590
2013-07-01 03:00:00,0.551094,0.200067,0.232500,0.119985,0.229722,0.288432,0.507226,0.541530,0.607430,0.320768
2013-07-01 04:00:00,0.550652,0.184491,0.224215,0.193972,0.265429,0.359370,0.648843,0.601712,0.502306,0.212353
2013-07-01 05:00:00,0.477922,0.178671,0.226642,0.231144,0.281932,0.348403,0.560319,0.537393,0.453832,0.172406
...,...,...,...,...,...,...,...,...,...,...
2013-12-31 20:00:00,0.736336,0.626008,0.759972,0.857648,0.849007,0.858754,0.599869,0.500731,0.676839,0.879829
2013-12-31 21:00:00,0.772560,0.679814,0.725188,0.905970,0.863707,0.865298,0.592687,0.498410,0.670995,0.862189
2013-12-31 22:00:00,0.758845,0.633772,0.768021,0.926823,0.865309,0.863108,0.515467,0.490383,0.713100,0.832610
2013-12-31 23:00:00,0.382185,0.636487,0.825061,0.713358,0.807285,0.858691,0.375964,0.366193,0.466791,0.526636


In [25]:
# save the predictions in ```RandomForest_Predictions.csv ```
df_pred.to_csv('../results/RandomForest_Predictions.csv')