In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor

from sklearn.metrics import mean_squared_error

from keras.models import Sequential
from keras.layers import Dense

from tqdm import tqdm

  from numpy.core.umath_tests import inner1d
Using TensorFlow backend.


In [2]:
df = pd.read_csv('usina72.csv')

In [3]:
df.head()

Unnamed: 0,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12
0,27.677983,91.388781,113.20953,119.066763,35,35,11582.0,10687.94371,431.0,415.311985
1,67.764253,18.374644,66.687665,86.138898,35,35,10687.94371,10481.06389,415.311985,368.255564
2,15.586149,44.981999,70.224125,60.568148,35,35,10481.06389,10255.49924,368.255564,394.118132
3,46.285723,35.742767,80.82462,82.028491,35,35,10255.49924,10535.18598,394.118132,390.997697
4,70.895098,54.298673,118.786567,125.193771,35,35,10535.18598,10720.26798,390.997697,373.836642


In [4]:
features = ['f'+str(i) for i in range(5, 13)]
X = df.loc[:, features]
y = df.loc[:, ['f3', 'f4']]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

In [6]:
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

• Regressão linear

• SVR

• Redes Neurais (MLP) (+ de uma camada escondida)

• kNN

• Árvore de Decisão

• Random Forest

• Gradient Boosting

In [7]:
models = {}

models['LinearRegression'] = LinearRegression()
models['SVR'] = SVR()

nn = Sequential()
nn.add(Dense(9, input_dim=8, kernel_initializer='normal', activation='relu'))
nn.add(Dense(10, activation='relu'))
nn.add(Dense(1, kernel_initializer='normal'))
nn.compile(loss='mean_squared_error', optimizer='adam')

models['DNN'] = nn
models['KNeighborsRegressor'] = KNeighborsRegressor()
models['DecisionTreeRegressor'] = DecisionTreeRegressor()
models['RandomForestRegressor'] = RandomForestRegressor()
models['GradientBoostingRegressor'] = GradientBoostingRegressor()

In [8]:
results = {}
pbar = tqdm(total=len(models), desc='Treinamento e previsão dos modelos')

for model in models:
    print('\nBeginning model:', model, '\n')
    
    for target in y_train.columns:
        print('\nAlvo:', target)
        
        y_train_tmp = y_train.loc[:, target]
        y_test_tmp = y_test.loc[:, target]
        
        if model == 'DNN':
            models[model].fit(X_train, y_train_tmp, batch_size=128, epochs=20)
        else:
            models[model].fit(X_train, y_train_tmp)
            
        pred = models[model].predict(X_test)
        mse = mean_squared_error(y_test_tmp, pred)
        print('MSE:', mse)
        
        std = y_test.loc[:, target].std()
        pred_df = pd.DataFrame({'pred': pred.reshape(-1,)}, index=y_test.index)
        
        percent = pred_df.loc[(pred_df['pred'] < y_test.loc[:, target] + std) & (pred_df['pred'] > y_test.loc[:, target] - std), ].shape[0] / len(y_test)
        
        print('Percent inside 1 stadard deviation:', percent)
        
    pbar.update(1)
        
pbar.close()

Treinamento e previsão dos modelos:   0%|          | 0/7 [00:00<?, ?it/s]


Beginning model: LinearRegression 


Alvo: f3
MSE: 952.1508589325759
Percent inside 1 stadard deviation: 0.8477894736842105

Alvo: f4
MSE: 952.1508590175673
Percent inside 1 stadard deviation: 0.9872280701754386

Beginning model: SVR 


Alvo: f3
MSE: 1051.5390483817575
Percent inside 1 stadard deviation: 0.7632280701754386

Alvo: f4


Treinamento e previsão dos modelos:  29%|██▊       | 2/7 [03:02<07:37, 91.43s/it]

MSE: 4743.609142605102
Percent inside 1 stadard deviation: 0.9142105263157895

Beginning model: DNN 


Alvo: f3
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
MSE: 592.5557147186965
Percent inside 1 stadard deviation: 0.9417192982456141

Alvo: f4
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


Treinamento e previsão dos modelos:  43%|████▎     | 3/7 [03:14<04:29, 67.38s/it]

MSE: 283.6776681605853
Percent inside 1 stadard deviation: 0.9857543859649123

Beginning model: KNeighborsRegressor 


Alvo: f3
MSE: 577.7372373306835
Percent inside 1 stadard deviation: 0.9121052631578948

Alvo: f4


Treinamento e previsão dos modelos:  57%|█████▋    | 4/7 [03:15<02:22, 47.62s/it]

MSE: 746.7854455346667
Percent inside 1 stadard deviation: 0.9654736842105263

Beginning model: DecisionTreeRegressor 


Alvo: f3
MSE: 497.77090107131784
Percent inside 1 stadard deviation: 0.9131929824561403

Alvo: f4


Treinamento e previsão dos modelos:  71%|███████▏  | 5/7 [03:16<01:07, 33.58s/it]

MSE: 496.2331685846548
Percent inside 1 stadard deviation: 0.968842105263158

Beginning model: RandomForestRegressor 


Alvo: f3
MSE: 276.775070044466
Percent inside 1 stadard deviation: 0.9560350877192982

Alvo: f4


Treinamento e previsão dos modelos:  86%|████████▌ | 6/7 [03:21<00:24, 25.00s/it]

MSE: 280.1225591775776
Percent inside 1 stadard deviation: 0.9888421052631579

Beginning model: GradientBoostingRegressor 


Alvo: f3
MSE: 247.34913352402745
Percent inside 1 stadard deviation: 0.9592982456140351

Alvo: f4


Treinamento e previsão dos modelos: 100%|██████████| 7/7 [03:26<00:00, 18.96s/it]

MSE: 250.07436353458667
Percent inside 1 stadard deviation: 0.988561403508772





Para ambos os alvos `f3` e `f4` o Gradient Boosting é o modelo que minimiza o MSE

Os modelos Random Forest e Gradient Boosting estão com pelo menos 95% das previsões dentro de um desvio padrão para mais ou para menos