In [30]:
import numpy as np
import pandas as pd
import scipy.stats

from sklearn.model_selection import train_test_split 
from sklearn.neural_network import MLPRegressor
from sklearn import metrics
from math import sqrt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve, auc

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import talib
from numpy import mean, absolute
import yfinance as yf

import warnings
warnings.filterwarnings("ignore")

In [31]:
# Get the data

p1 = 252

ticker1 = "PETR4.SA" 
df1 = yf.download(ticker1, "2012-01-01", "2022-12-31")
df1["Retornos"] = df1["Adj Close"].pct_change(1)
df1["Adj Low"] = df1["Low"] - (df1["Close"]-df1["Adj Close"])
df1["Adj High"] = df1["High"] - (df1["Close"]-df1["Adj Close"])
df1["Adj Open"] = df1["Open"] - (df1["Close"]-df1["Adj Close"])
df1["Vol"] = df1["Retornos"].rolling(20).std()*np.sqrt(252)
df1.dropna(axis = 0, inplace = True) 
df1.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Retornos,Adj Low,Adj High,Adj Open,Vol
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2012-01-31,24.75,24.940001,24.33,24.57,9.809544,28139600,0.0,9.569544,10.179544,9.989544,0.254916
2012-02-01,24.700001,25.17,24.629999,24.950001,9.96126,24357500,0.015466,9.641258,10.181259,9.71126,0.22604
2012-02-02,24.879999,25.219999,24.450001,24.530001,9.793574,26639600,-0.016834,9.713574,10.483573,10.143573,0.23933
2012-02-03,24.469999,24.93,24.370001,24.610001,9.825515,20308300,0.003261,9.585515,10.145514,9.685513,0.232028
2012-02-06,24.49,24.860001,24.4,24.860001,9.925326,14430800,0.010158,9.465325,9.925326,9.555326,0.232175


In [32]:
# Construção do alvo

# Alvo 1 - Volatilidade
df1["Alvo1"] = df1["Vol"].shift(-10)

# Criação do alvo categórico
df1["Alvo1_cat"] = np.where(df1["Alvo1"] > 0 , 1, 0)

In [33]:
# Variáveis

df1["MA5"] = df1["Adj Close"].rolling(5).mean()
df1["MA10"] = df1["Adj Close"].rolling(10).mean()
df1["MA20"] = df1["Adj Close"].rolling(20).mean()
df1["MA52"] = df1["Adj Close"].rolling(52).mean()

df1["RSL5"] = df1["Adj Close"]/df1["MA5"]-1
df1["RSL10"] = df1["Adj Close"]/df1["MA10"]-1
df1["RSL20"] = df1["Adj Close"]/df1["MA20"]-1
df1["RSL52"] = df1["Adj Close"]/df1["MA52"]-1

df1["Std5"] = df1["Retornos"].rolling(5).std()
df1["Std10"] = df1["Retornos"].rolling(10).std()
df1["Std20"] = df1["Retornos"].rolling(20).std()
df1["Std52"] = df1["Retornos"].rolling(52).std()

df1.dropna(inplace = True)

In [34]:
# Separando os dados com as variaveis em x e o alvo em y

# Separando os dados entre treinamento e teste

# Vamos treinar
start_train = "2012-01-01"
end_train = "2017-12-31"

# Vamos testar
start_test = "2018-01-01"
end_test = "2022-12-31"

df1_train = df1.loc[start_train : end_train]

df1_test = df1.loc[start_test : end_test]


# Separando os dados com as variaveis em x e o alvo em y
 
manter = ["Retornos","RSL5", "RSL10", "RSL20", "RSL52", "Std5", "Std10", "Std20", "Std52"]
x_train = df1_train[manter]
y_train = df1_train["Alvo1"]

x_test = df1_test[manter]
y_test = df1_test["Alvo1"]

In [35]:
print(x_train.shape)
print(x_test.shape)

(1416, 9)
(1222, 9)


In [36]:
# Treinando o modelo

model = MLPRegressor(hidden_layer_sizes = (20,10,20,20), max_iter = 1000,
                    solver = "adam", verbose = 10, tol = 1e-6, random_state = 42,
                    learning_rate_init = .001, activation = "relu")

model.fit(x_train, y_train) # essa é a linha que treina o modelo!!!!

Iteration 1, loss = 0.16537268
Iteration 2, loss = 0.08767170
Iteration 3, loss = 0.03641234
Iteration 4, loss = 0.01735294
Iteration 5, loss = 0.01842812
Iteration 6, loss = 0.01850399
Iteration 7, loss = 0.01670055
Iteration 8, loss = 0.01625731
Iteration 9, loss = 0.01634899
Iteration 10, loss = 0.01614903
Iteration 11, loss = 0.01583544
Iteration 12, loss = 0.01566242
Iteration 13, loss = 0.01553998
Iteration 14, loss = 0.01529380
Iteration 15, loss = 0.01517138
Iteration 16, loss = 0.01498504
Iteration 17, loss = 0.01481135
Iteration 18, loss = 0.01462903
Iteration 19, loss = 0.01443100
Iteration 20, loss = 0.01424117
Iteration 21, loss = 0.01403303
Iteration 22, loss = 0.01384384
Iteration 23, loss = 0.01362024
Iteration 24, loss = 0.01327173
Iteration 25, loss = 0.01302048
Iteration 26, loss = 0.01272464
Iteration 27, loss = 0.01252479
Iteration 28, loss = 0.01228970
Iteration 29, loss = 0.01198651
Iteration 30, loss = 0.01171088
Iteration 31, loss = 0.01150081
Iteration 32, los

MLPRegressor(hidden_layer_sizes=(20, 10, 20, 20), max_iter=1000,
             random_state=42, tol=1e-06, verbose=10)

In [37]:
# Predições para o treinamento e teste
y_pred_train = model.predict(x_train)
y_pred_test = model.predict(x_test)

In [38]:
# Avaliação dos resultados

MAE_train = metrics.mean_absolute_error(y_train, y_pred_train) 
R2_train = metrics.r2_score(y_train, y_pred_train) # Coeficiente de determinaçao
RMSE_train = sqrt(metrics.mean_squared_error(y_train, y_pred_train))

print("----- Avaliaçao do treinamento -----")
print("MAE: ", round(MAE_train,2))
print('R2: ', round(R2_train,2))
print('RMSE: ', round(RMSE_train,2))

MAE_test = metrics.mean_absolute_error(y_test, y_pred_test) 
R2_test = metrics.r2_score(y_test, y_pred_test)
RMSE_test = sqrt(metrics.mean_squared_error(y_test, y_pred_test))

print("")
print("----- Avaliaçao do teste -----")
print("MAE: ", round(MAE_test,2))
print('R2: ', round(R2_test,2))
print('RMSE: ', round(RMSE_test,2))


----- Avaliaçao do treinamento -----
MAE:  0.06
R2:  0.81
RMSE:  0.08

----- Avaliaçao do teste -----
MAE:  0.08
R2:  0.62
RMSE:  0.15


In [39]:
# Resultados: Real x previsto em um dataframe

resultados = pd.DataFrame({'Real': y_test, 'Previsto': y_pred_test})
resultados["Residuos"] = resultados["Real"] - resultados["Previsto"]
resultados

Unnamed: 0_level_0,Real,Previsto,Residuos
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-02,0.203918,0.275978,-0.072060
2018-01-03,0.228571,0.269844,-0.041273
2018-01-04,0.236636,0.266507,-0.029871
2018-01-05,0.205889,0.255883,-0.049994
2018-01-08,0.199461,0.252013,-0.052552
...,...,...,...
2022-11-30,0.468980,0.486859,-0.017879
2022-12-01,0.481711,0.521770,-0.040059
2022-12-02,0.479137,0.505832,-0.026695
2022-12-05,0.482810,0.492926,-0.010116


In [40]:
# Avaliando os resultados

fig = make_subplots(rows = 1, cols = 1
                    , shared_xaxes = True
                    , vertical_spacing = 0.05)

fig.add_trace(go.Scatter(x = resultados.index, y = resultados["Previsto"].rolling(5).mean()
                                , name = "Rede Neural"
                                , line = dict(color = "red"))
              , row = 1, col = 1)

fig.add_trace(go.Scatter(x = resultados.index, y = resultados["Real"].rolling(5).mean()
                                , name = "Real"
                                , line = dict(color = "blue"))
              , row = 1, col = 1)

fig.update_layout(height = 600, width = 800
                  , title_text = "Volatilidade " + ticker1 + " - Real x Previsto"
                  , font_color = "blue"
                  , title_font_color = "black"
                  , xaxis_title = "Tempo"
                  , yaxis_title = "Volatilidade"
                  , font = dict(size = 15, color = "Black")
                 )
fig.show()