In [None]:
import yfinance as yf

bpan4 = yf.Ticker("BPAN4.SA")

df_bpan4 = bpan4.history(start="2021-01-01",end="2023-12-30")

In [None]:

from sklearn.metrics import root_mean_squared_error
import pandas as pd
import plotly.express as px
from statsmodels.tsa.arima.model import ARIMA
train_size_bpan4 = int(len(df_bpan4) * 0.8)
train_bpan4, test_bpan4 = df_bpan4[0:train_size_bpan4], df_bpan4[train_size_bpan4:len(df_bpan4)]

matrix = {"janela":[], "rmse_a": [], "rmse_e":[], "rmse_arima":[]}
for i in range(2, 15):

    # p - Número de observações passadas
    # d - Número de vezes que ocorreram diferenciação entre os dados passados 
    # q - Janela de médias móveis
    model_train = ARIMA(train_bpan4['Close'], order=(1, 1, i))
    
    model_train_fit = model_train.fit()

    test_forecast = model_train_fit.get_forecast(steps=len(test_bpan4))
    test_forecast_series = pd.Series(test_forecast.predicted_mean.to_numpy(), index=test_bpan4.index)

    # Calculate the mean squared error
    rmse_arima = root_mean_squared_error(test_bpan4['Close'], test_forecast_series)
    
    name_a = f'med_mov_aritimética_{i}'
    name_e = f'med_mov_exponencial_{i}'
    df_bpan4[name_a] = df_bpan4['Close'].rolling(window=i, min_periods=0).mean()
    df_bpan4[name_e] = df_bpan4['Close'].ewm(span=i, min_periods=0,adjust=False).mean()
    rmse_a = root_mean_squared_error(df_bpan4['Close'].to_numpy(), df_bpan4[name_a].to_numpy())
    rmse_e = root_mean_squared_error(df_bpan4['Close'].to_numpy(), df_bpan4[name_e].to_numpy())
    matrix["janela"].append(i)
    matrix["rmse_a"].append(rmse_a.round(5))
    matrix["rmse_e"].append(rmse_e.round(5))
    matrix['rmse_arima'].append(rmse_arima.round(5))



In [None]:


df = pd.DataFrame(matrix)

fig = px.line(
    df, 
    x='janela',
    y=['rmse_a', 'rmse_e', 'rmse_arima'],
    
)
fig.update_layout(title="Relação RMSE x Qde de dias analisados para BPAN4", xaxis_title="Qde de dias analisados", yaxis_title="RMSE") 

fig.show()