In [120]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor


from sklearn.model_selection import GridSearchCV

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, roc_auc_score
from sklearn.model_selection import cross_val_score

import warnings
warnings.filterwarnings('ignore')

In [48]:
dados_padronizados= pd.read_csv("C:/Projetos Pessoais/DataScience/Previsao_Juros_BACEN_IA/data/dados_proc_padronizados.csv", sep = ',', encoding='utf-8')
dados_normalizados= pd.read_csv("C:/Projetos Pessoais/DataScience/Previsao_Juros_BACEN_IA/data/dados_proc_normalizados.csv", sep = ',', encoding='utf-8')

In [18]:
dados_padronizados.set_index("data", inplace=True)

In [21]:
dados_padronizados = dados_padronizados.drop(dados_padronizados.index[-1])

In [22]:
dados_padronizados.tail()

Unnamed: 0_level_0,PIB,Cambio_Dolar,IGPM,Inflacao_Mensal,Selic
data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-07-01,2.237638,1.906063,-0.017499,-0.296168,10.5
2024-08-01,2.191551,1.968029,-0.362485,-1.310027,10.5
2024-09-01,2.145647,1.932564,-0.006718,-0.144089,10.5
2024-10-01,2.278188,1.798173,0.963554,0.160068,10.75
2024-11-01,2.290019,2.077492,0.726376,-0.270822,11.25


In [24]:
# Separando variáveis preditoras da alvo
X = dados_padronizados[['PIB', 'Cambio_Dolar', 'IGPM', 'Inflacao_Mensal']]
y = dados_padronizados['Selic']

In [32]:
limite_X = int(len(X) * 0.8)
limite_y = int(len(y) * 0.8)

In [42]:
X_train, X_test = X[:limite_X], X[limite_X:]
y_train, y_test = y[:limite_y], y[limite_y:]

In [43]:
len(X_train)

220

In [44]:
len(y_train)

220

## <font color = "red"> Modelo 0 - Regressão Ridge

In [121]:
# Modelo de regressão
ridge = Ridge()

# Definição do grid de parâmetros
param_grid = {
    'alpha': [0.1, 1.0, 10.0, 100.0],  # Parâmetros de regularização
    'fit_intercept': [True, False],   # Se ajusta o intercepto ou não
    'solver': ['auto', 'svd', 'cholesky', 'saga']  # Algoritmos disponíveis
}

In [122]:
grid_search = GridSearchCV(
    estimator=ridge,
    param_grid=param_grid,
    scoring='neg_mean_squared_error',  # Avaliação baseada no MSE negativo (usado para regressão)
    cv=5,  # Validação cruzada com 5 folds
    verbose=1,
    n_jobs=-1  # Paralelização total
)

In [123]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
previsoes_v0_Ridge = grid_search.best_estimator_
y_pred = previsoes_v0_Ridge.predict(X_test)

In [125]:
modelo_v0_Ridge_metricas = {
   "Modelo": "Ridge",
   "Numero": 0,
   "Detalhes": " ",
   "Mean Squared Error (MSE)": round(mean_squared_error(y_test, y_pred),2),
   "r2": round(r2_score(y_test, y_pred), 2)
}
modelo_v0_Ridge_metricas

{'Modelo': 'Ridge',
 'Numero': 0,
 'Detalhes': ' ',
 'Mean Squared Error (MSE)': 52.15,
 'r2': -1.58}

## <font color = "red"> Modelo 1 - Regressão Linear

In [45]:
model_1_LinearRegression = LinearRegression()
model_1_LinearRegression.fit(X_train, y_train)

In [46]:
# Previsões modelo 1
y_pred_model_1 = model_1_LinearRegression.predict(X_test)

In [47]:
# Avaliando o modelo
mse = mean_squared_error(y_test, y_pred_model_1)
r2 = r2_score(y_test, y_pred_model_1)

print(f'MSE: {mse}')
print(f'R²: {r2}')

MSE: 52.196068424894705
R²: -1.5798736882702782


## <font color = "red"> Modelo 2 - Support Vector Regressor

In [133]:
regressor = SVR()

In [134]:
param = {
   # 'C_range': np.array([50., 100., 200.]),
   'kernel' : ['Linear', 'rbf', 'poly'],
   # 'gamma_range': np.array([0.3*0.001, 0.001, 3*0.001])

}

In [135]:
C_range = np.array([50., 100., 200.]),
gamma_range = np.array([0.3*0.001, 0.001, 3*0.001])
kernel_svm = ['Linear', 'rbf', 'poly']

In [136]:
# svm_param_grid = dict('kernel': kernel_svm, 'gamma' = gamma_range, C = C_range)
model_2_SVR = GridSearchCV(regressor, param, cv = 3)

In [137]:
model_2_SVR.fit(X_train, y_train)

In [138]:
model_2_SVR.best_params_

{'kernel': 'rbf'}

In [139]:
y_pred_model_2_SVR = model_2_SVR.predict(X_test)

In [140]:
modelo_v2_SVR_metricas = {
   "Modelo": "SVR",
   "Numero": 2,
   "Detalhes": "Kernel RBF",
   "Mean Squared Error (MSE)": round(mean_squared_error(y_test, y_pred_model_2_SVR),2),
   "r2": round(r2_score(y_test, y_pred_model_2_SVR), 2)
}
modelo_v2_SVR_metricas

{'Modelo': 'SVR',
 'Numero': 2,
 'Detalhes': 'Kernel RBF',
 'Mean Squared Error (MSE)': 31.87,
 'r2': -0.58}

## <font color = "red"> Modelo 3 - Decision Tree com Grid Search

In [85]:
# Parametros para grid
params_decision_tree = {
 "criterion": ["squared_error", "friedman_mse", "absolute_error", "poisson"],
 "max_depth": [3, None],
 "min_samples_leaf": [1, 3, 10],
 "min_samples_split": [2, 3],
 "max_depth": [1,3,10]
}

In [86]:
reg_decision_tree = DecisionTreeRegressor()

# Grid Search
modelo_v3_decisionTree = GridSearchCV(reg_decision_tree,
                                      params_decision_tree,
                                      scoring='accuracy',
                                      cv = 5)

In [87]:
modelo_v3_decisionTree.fit(X_train, y_train)

In [88]:
modelo_v3_decisionTree.best_params_

{'criterion': 'squared_error',
 'max_depth': 1,
 'min_samples_leaf': 1,
 'min_samples_split': 2}

In [89]:
y_pred_model_3_DecisionTree = modelo_v3_decisionTree.predict(X_test)

In [90]:
modelo_v3_DecisionTree_metricas = {
   "Modelo": "Decision Tree",
   "Numero": 3,
   "Detalhes": "Squared_error",
   "Mean Squared Error (MSE)": round(mean_squared_error(y_test, y_pred_model_3_DecisionTree),2),
   "r2": round(r2_score(y_test, y_pred_model_3_DecisionTree), 2)
}
modelo_v3_DecisionTree_metricas

{'Modelo': 'Decision Tree',
 'Numero': 3,
 'Detalhes': 'Squared_error',
 'Mean Squared Error (MSE)': 21.97,
 'r2': -0.09}

## <font color = "red"> Modelo 4 - Random Forest com Grid Search

In [91]:
regressor_Random_Forest = RandomForestRegressor(n_estimators = 150)

In [93]:
# Parametros
params_random_forest = {
    'max_depth': [None, 2],  # Profundidade máxima de cada árvore
    'min_samples_split': [2, 5],  # Mínimo de amostras necessárias para dividir um nó
    'min_samples_leaf': [1, 2, 3],    # Mínimo de amostras necessárias em um nó folha
    'max_features': ['sqrt', 'log2'],
    'criterion': ['gini', 'entropy', 'friedman_mse', 'poisson']
}

In [94]:
# Grid Search
modelo_v4_RandomForest = GridSearchCV(regressor_Random_Forest,
                                      params_random_forest,
                                      cv = 5
                                      )

In [95]:
modelo_v4_RandomForest.fit(X_train, y_train)

In [97]:
modelo_v4_RandomForest.best_estimator_

In [96]:
y_pred_model_4_RandomForest = modelo_v4_RandomForest.predict(X_test)

In [98]:
modelo_v4_RandomForest = {
   "Modelo": "Random Forest",
   "Numero": 4,
   "Detalhes": "poison",
   "Mean Squared Error (MSE)": round(mean_squared_error(y_test, y_pred_model_4_RandomForest),2),
   "r2": round(r2_score(y_test, y_pred_model_4_RandomForest), 2)
}
modelo_v4_RandomForest

{'Modelo': 'Random Forest',
 'Numero': 4,
 'Detalhes': 'poison',
 'Mean Squared Error (MSE)': 35.21,
 'r2': -0.74}

## <font color = "red"> Modelo 4 - Extra Tress

In [101]:
reg_exta_trees = ExtraTreesRegressor(n_estimators = 150)

In [102]:
# Parametros
params_extra_trees = {
    'max_depth': [None, 2],  # Profundidade máxima de cada árvore
    'min_samples_split': [2, 3, 5],  # Mínimo de amostras necessárias para dividir um nó
    'min_samples_leaf': [1, 2, 3],    # Mínimo de amostras necessárias em um nó folha
    'max_features': ["sqrt", "log2"],
    'criterion': ['squared_error', 'absolute_error', 'friedman_mse', 'poisson']
}

In [103]:
# Grid Search
modelo_v5_extraTrees = GridSearchCV(reg_exta_trees,
                                    params_extra_trees,
                                    cv = 5)

In [104]:
modelo_v5_extraTrees.fit(X_train, y_train)

modelo_v5_extraTrees.best_params_

{'criterion': 'poisson',
 'max_depth': None,
 'max_features': 'sqrt',
 'min_samples_leaf': 1,
 'min_samples_split': 5}

In [106]:
y_pred_model_5_ExtraTrees = modelo_v5_extraTrees.predict(X_test)

In [107]:
modelo_v5_ExtraTrees = {
   "Modelo": "ExtraTrees",
   "Numero": 5,
   "Detalhes": "sqrt",
   "Mean Squared Error (MSE)": round(mean_squared_error(y_test, y_pred_model_5_ExtraTrees),2),
   "r2": round(r2_score(y_test, y_pred_model_5_ExtraTrees), 2)
}
modelo_v5_ExtraTrees

{'Modelo': 'ExtraTrees',
 'Numero': 5,
 'Detalhes': 'sqrt',
 'Mean Squared Error (MSE)': 37.66,
 'r2': -0.86}

## <font color = "red"> Modelo 6 - Adaboost

In [None]:
reg_adaboost = AdaBoostRegressor()

In [109]:
# Utilizarei a melhor árvore de decisão, pois obteve métricas boas

estimador_base = DecisionTreeRegressor(criterion = 'squared_error',
                                       max_depth = 1,
                                       min_samples_leaf = 1,
                                       min_samples_split = 2)

In [110]:
modelo_v6_adaBoost = AdaBoostRegressor(base_estimator = estimador_base,
                                        learning_rate = 0.5,
                                        n_estimators = 300
                                        )

In [111]:
modelo_v6_adaBoost.fit(X_train, y_train)

In [112]:
modelo_v6_adaBoost.get_params()

{'base_estimator__ccp_alpha': 0.0,
 'base_estimator__criterion': 'squared_error',
 'base_estimator__max_depth': 1,
 'base_estimator__max_features': None,
 'base_estimator__max_leaf_nodes': None,
 'base_estimator__min_impurity_decrease': 0.0,
 'base_estimator__min_samples_leaf': 1,
 'base_estimator__min_samples_split': 2,
 'base_estimator__min_weight_fraction_leaf': 0.0,
 'base_estimator__random_state': None,
 'base_estimator__splitter': 'best',
 'base_estimator': DecisionTreeRegressor(max_depth=1),
 'estimator': None,
 'learning_rate': 0.5,
 'loss': 'linear',
 'n_estimators': 300,
 'random_state': None}

In [113]:
previsoes_v6_adaBoost = modelo_v6_adaBoost.predict(X_test)

In [114]:
modelo_v6_adaBoost = {
   "Modelo": "AdaBoost",
   "Numero": 6,
   "Detalhes": "DecisionTree",
   "Mean Squared Error (MSE)": round(mean_squared_error(y_test, previsoes_v6_adaBoost),2),
   "r2": round(r2_score(y_test, previsoes_v6_adaBoost), 2)
}
modelo_v6_adaBoost

{'Modelo': 'AdaBoost',
 'Numero': 6,
 'Detalhes': 'DecisionTree',
 'Mean Squared Error (MSE)': 21.97,
 'r2': -0.09}

## <font color = "red"> Modelo 7 - Gradient Boosting Regressor

In [116]:
modelo_v7_GradientBoostingRegressor = GradientBoostingRegressor(
                                        learning_rate = 0.5,
                                        n_estimators = 300)

In [117]:
modelo_v7_GradientBoostingRegressor.fit(X_train,y_train)

In [118]:
previsoes_v7_GBR = modelo_v7_GradientBoostingRegressor.predict(X_test)

In [119]:
modelo_v7_GradientBR = {
   "Modelo": "Gradient Boosting Regressor",
   "Numero": 7,
   "Detalhes": "DecisionTree",
   "Mean Squared Error (MSE)": round(mean_squared_error(y_test, previsoes_v7_GBR),2),
   "r2": round(r2_score(y_test, previsoes_v7_GBR), 2)
}
modelo_v7_GradientBR 

{'Modelo': 'Gradient Boosting Regressor',
 'Numero': 7,
 'Detalhes': 'DecisionTree',
 'Mean Squared Error (MSE)': 38.65,
 'r2': -0.91}