In [None]:
import yfinance as yf

bpac3 = yf.Ticker("BPAC3.SA")

df_bpac3 = bpac3.history(start="2021-01-01",end="2023-12-30")

bpan4 = yf.Ticker("BPAN4.SA")

df_bpan4 = bpan4.history(start="2021-01-01",end="2023-12-30")

meli34 = yf.Ticker("MELI34.SA")

df_meli34 = meli34.history(start="2021-01-01",end="2023-12-30")

pags34 = yf.Ticker("PAGS34.SA")

df_pags34 = pags34.history(start="2021-01-01",end="2023-12-30")


In [None]:
from sklearn.metrics import root_mean_squared_error
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
def rmse_arithmetic(df, i):
    name_a = f'med_mov_aritimética_{i}'
    
    df[name_a] = df['Close'].rolling(window=i, min_periods=0).mean()
    rmse_a_bpac3 = root_mean_squared_error(df['Close'].to_numpy(), df[name_a].to_numpy())
    return rmse_a_bpac3

def rmse_exponential(df, i):
    name_e = f'med_mov_exponencial_{i}'
    
    df[name_e] = df['Close'].ewm(span=i, min_periods=0,adjust=False).mean()
    rmse_e_bpac3 = root_mean_squared_error(df['Close'].to_numpy(), df[name_e].to_numpy())
    return rmse_e_bpac3

def rmse_arima(train, test, i):

    # p - Número de observações passadas
    # d - Número de vezes que ocorreram diferenciação entre os dados passados 
    # q - Janela de médias móveis
    model_train = ARIMA(train['Close'], order=(1, 1, i))
    
    model_train_fit = model_train.fit()

    test_forecast = model_train_fit.get_forecast(steps=len(test))
    test_forecast_series = pd.Series(test_forecast.predicted_mean.to_numpy(), index=test.index)

    # Calculate the mean squared error
    return root_mean_squared_error(test['Close'], test_forecast_series)

In [None]:

import pandas as pd
import plotly.express as px

train_size_bpac3 = int(len(df_bpac3) * 0.8)
train_bpac3, test_bpac3 = df_bpac3[0:train_size_bpac3], df_bpac3[train_size_bpac3:len(df_bpac3)]

train_size_bpan4 = int(len(df_bpan4) * 0.8)
train_bpan4, test_bpan4 = df_bpan4[0:train_size_bpan4], df_bpan4[train_size_bpan4:len(df_bpan4)]

train_size_meli34 = int(len(df_meli34) * 0.8)
train_meli34, test_meli34 = df_meli34[0:train_size_meli34], df_meli34[train_size_meli34:len(df_meli34)]

train_size_pags34 = int(len(df_pags34) * 0.8)
train_pags34, test_pags34 = df_pags34[0:train_size_pags34], df_pags34[train_size_pags34:len(df_pags34)]


matrix = {"janela":[], "rmse_a": [], "rmse_e":[], "rmse_arima": []}
for i in range(2, 15):
    rmse_a_bpac3 = rmse_arithmetic(df_bpac3, i)
    rmse_e_bpac3 = rmse_exponential(df_bpac3, i)
    rmse_arima_bpac3 = rmse_arima(train_bpac3, test_bpac3, i)
    

    rmse_a_bpan4 = rmse_arithmetic(df_bpan4, i)
    rmse_e_bpan4 = rmse_exponential(df_bpan4, i)
    rmse_arima_bpan4 = rmse_arima(train_bpan4, test_bpan4, i)
    

    rmse_a_meli34 = rmse_arithmetic(df_meli34, i)
    rmse_e_meli34 = rmse_exponential(df_meli34, i)
    rmse_arima_meli34 = rmse_arima(train_meli34, test_meli34, i)
    

    rmse_a_pags34 = rmse_arithmetic(df_pags34, i)
    rmse_e_pags34 = rmse_exponential(df_pags34, i)
    rmse_arima_pags34 = rmse_arima(train_pags34, test_pags34, i)
    

    
    rmse_a = (rmse_a_bpac3 + rmse_a_bpan4 + rmse_a_meli34 + rmse_a_pags34)/4
    rmse_e = (rmse_e_bpac3 + rmse_e_bpan4 + rmse_e_meli34 + rmse_e_pags34)/4
    rmse_arima_average = (rmse_arima_bpac3 + rmse_arima_bpan4 + rmse_arima_meli34 + rmse_arima_pags34)/4

    matrix["janela"].append(i)
    matrix["rmse_a"].append(rmse_a.round(5))
    matrix["rmse_e"].append(rmse_e.round(5))
    matrix["rmse_arima"].append(rmse_arima_average.round(5))
    



In [None]:
df = pd.DataFrame(matrix)

fig = px.line(
    df, 
    x='janela',
    y=['rmse_a', 'rmse_e', 'rmse_arima'],
    
)
fig.update_layout(title="Relação Média RMSE x Qde de dias analisados", xaxis_title="Qde de dias analisados", yaxis_title="RMSE Médio") 

fig.show()