In [61]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import SGDRegressor, LinearRegression
from sklearn.ensemble import RandomForestRegressor

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from analise_data_ru import ROOT_PATH

In [62]:
n_samples, n_features = 10, 5
rand = np.random.RandomState(666)

y = rand.randn(n_samples)
X = rand.randn(n_samples, n_features)

transformer = Pipeline(
    steps=[
        ('preprocess', StandardScaler()),
    ]
)

regressors_almoco = {
    'SVR': (svr := SVR()),
    'MLP_regr': (mlp_regr := MLPRegressor()),
    'SGDR': (sgdr := SGDRegressor()),
    'linear_reg': (linear := LinearRegression()),
    'random_forest': (rf := RandomForestRegressor()),
}

regressors_janta = {
    'SVR': (svr := SVR()),
    'MLP_regr': (mlp_regr := MLPRegressor()),
    'linear_reg': (linear := LinearRegression()),
    'random_forest': (rf := RandomForestRegressor()),
}

In [63]:
final_base = pd.read_csv(ROOT_PATH / 'data' / 'processed' / 'final_base.csv')
final_base = final_base.drop(columns=['dia'])
final_base

Unnamed: 0,Data,ano,mes,dia_semana,Qt_almoco,Qt_jantar
0,2016-01-04,2016,1,0,364,294
1,2016-01-05,2016,1,1,489,363
2,2016-01-06,2016,1,2,522,401
3,2016-01-07,2016,1,3,514,349
4,2016-01-08,2016,1,4,409,218
...,...,...,...,...,...,...
984,2020-03-24,2020,3,1,10,9
985,2020-03-26,2020,3,3,11,13
986,2020-03-27,2020,3,4,15,14
987,2020-03-30,2020,3,0,13,12


In [64]:
from datetime import datetime

In [65]:
date_array  = [datetime.strptime(date, '%Y-%m-%d') for date in final_base['Data']]

In [66]:
final_base['dia'] = [day.day for day in date_array]
final_base

Unnamed: 0,Data,ano,mes,dia_semana,Qt_almoco,Qt_jantar,dia
0,2016-01-04,2016,1,0,364,294,4
1,2016-01-05,2016,1,1,489,363,5
2,2016-01-06,2016,1,2,522,401,6
3,2016-01-07,2016,1,3,514,349,7
4,2016-01-08,2016,1,4,409,218,8
...,...,...,...,...,...,...,...
984,2020-03-24,2020,3,1,10,9,24
985,2020-03-26,2020,3,3,11,13,26
986,2020-03-27,2020,3,4,15,14,27
987,2020-03-30,2020,3,0,13,12,30


In [67]:
final_base['mes'] = [month.month for month in date_array]
final_base

Unnamed: 0,Data,ano,mes,dia_semana,Qt_almoco,Qt_jantar,dia
0,2016-01-04,2016,1,0,364,294,4
1,2016-01-05,2016,1,1,489,363,5
2,2016-01-06,2016,1,2,522,401,6
3,2016-01-07,2016,1,3,514,349,7
4,2016-01-08,2016,1,4,409,218,8
...,...,...,...,...,...,...,...
984,2020-03-24,2020,3,1,10,9,24
985,2020-03-26,2020,3,3,11,13,26
986,2020-03-27,2020,3,4,15,14,27
987,2020-03-30,2020,3,0,13,12,30


In [68]:
final_base = final_base.drop(columns=['Data'])

In [69]:
final_base

Unnamed: 0,ano,mes,dia_semana,Qt_almoco,Qt_jantar,dia
0,2016,1,0,364,294,4
1,2016,1,1,489,363,5
2,2016,1,2,522,401,6
3,2016,1,3,514,349,7
4,2016,1,4,409,218,8
...,...,...,...,...,...,...
984,2020,3,1,10,9,24
985,2020,3,3,11,13,26
986,2020,3,4,15,14,27
987,2020,3,0,13,12,30


In [70]:
final_base.to_csv(ROOT_PATH / 'data' / 'processed' / 'final_base_2.csv')

In [71]:
X_almoco = final_base.drop(columns=['Qt_almoco'])
y_almoco = final_base['Qt_almoco']

X_jantar = final_base.drop(columns=['Qt_jantar'])
y_jantar = final_base['Qt_jantar']

In [72]:
X_almoco_train, X_almoco_test, y_almoco_train, y_almoco_test = train_test_split(
    X_almoco, y_almoco, test_size=0.3, random_state=666, shuffle=True
)

X_janta_train, X_janta_test, y_janta_train, y_janta_test = train_test_split(
    X_jantar, y_jantar, test_size=0.3, random_state=666, shuffle=True
)

In [73]:
for k, regr_almoco in regressors_almoco.items():
    print(f'Training {k}:\n')
    regr_almoco.fit(X_almoco_train, y_almoco_train)
    print(f'Done {k}:\n')

Training SVR:

Done SVR:

Training MLP_regr:





Done MLP_regr:

Training SGDR:

Done SGDR:

Training linear_reg:

Done linear_reg:

Training random_forest:

Done random_forest:



In [74]:
for k, regr_almoco in regressors_almoco.items():
    print(f'Testing {k}:')
    y_almoco_pred = regr_almoco.predict(X_almoco_test)
    print(f'R2 {k}: {r2_score(y_almoco_test, y_almoco_pred)}')
    print('\n')

Testing SVR:
R2 SVR: 0.03115742550227585


Testing MLP_regr:
R2 MLP_regr: 0.9408150250590054


Testing SGDR:
R2 SGDR: -7.319270804054342e+25


Testing linear_reg:
R2 linear_reg: 0.9468085514450167


Testing random_forest:
R2 random_forest: 0.9655904547634331




In [75]:
for k, regr_janta in regressors_janta.items():
    print(f'Training {k}:\n')
    regr_janta.fit(X_janta_train, y_janta_train)
    print(f'Done {k}:\n')

Training SVR:



Done SVR:

Training MLP_regr:





Done MLP_regr:

Training linear_reg:

Done linear_reg:

Training random_forest:

Done random_forest:



In [76]:
for k, regr_janta in regressors_janta.items():
    print(f'Testing {k}:')
    y_janta_pred = regr_janta.predict(X_janta_test)
    print(f'R2 {k}: {r2_score(y_janta_test, y_janta_pred)}')
    print('\n')

Testing SVR:
R2 SVR: 0.1305593440388102


Testing MLP_regr:
R2 MLP_regr: 0.9446872602467713


Testing linear_reg:
R2 linear_reg: 0.94944322562205


Testing random_forest:
R2 random_forest: 0.9649040746158482


