In [1]:
from neuralforecast import NeuralForecast
import joblib
import os
os.chdir('..')
from src.utils import setLog
import pandas as pd
from datetime import datetime
from deltalake import DeltaTable
import matplotlib.pyplot as plt
import polars as pl

In [2]:
os.getcwd()

'/home/thales/postech/phase4/mle-tech-challenge-4/tc4-api'

In [3]:
logger = setLog('model')

In [6]:
def load_model(model_file : str) -> NeuralForecast :
    '''    
    Carrega o modelo treinado a partir do arquivo informado.

    Parameters:

        model_file (str) : Nome do modelo a ser usado na previsão.

    Returns:
    
        model : Modelo carregado usando a biblioteca joblib.
    '''
    logger.info(f'Validando a existência do modelo treinado : {model_file}')
    
    if os.path.exists(f'../ml_models/{model_file}'):
        logger.info(f'Modelo {model_file} encontrado na pasta ml_models.')
        return joblib.load(f'../ml_models/{model_file}')
    else:
        logger.error(f'Arquivo {model_file} não encontrado na pasta ml_models. Verifique o nome do modelo.')
        raise ValueError(f'Arquivo {model_file} não encontrado! Verifique o nome do modelo e tente novamente!')

In [23]:
model_file = 'neuralforecast_lstm_2025-01-30.joblib'
model : NeuralForecast = load_model(model_file)
logger.info(f'Modelo {model_file} carregado. Tipo do modelo {type(model)}')


2025-02-04 21:26:32,771 - model - INFO - Validando a existência do modelo treinado : neuralforecast_lstm_2025-01-30.joblib
2025-02-04 21:26:32,773 - model - INFO - Modelo neuralforecast_lstm_2025-01-30.joblib encontrado na pasta ml_models.
2025-02-04 21:26:32,814 - model - INFO - Modelo neuralforecast_lstm_2025-01-30.joblib carregado. Tipo do modelo <class 'neuralforecast.core.NeuralForecast'>


In [8]:

logger.info('Realizando previsões utilizando o horizonte definido no treinamento do modelo.')
predict_result : pd.DataFrame = model.predict()


2025-02-04 21:22:00,850 - model - INFO - Realizando previsões utilizando o horizonte definido no treinamento do modelo.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

In [24]:
predict_result

Unnamed: 0,unique_id,ds,LSTM
0,WEGE3.SA,2025-02-01,52.364040
1,WEGE3.SA,2025-02-02,51.475018
2,WEGE3.SA,2025-02-03,52.254383
3,WEGE3.SA,2025-02-04,52.672131
4,WEGE3.SA,2025-02-05,53.265884
...,...,...,...
72,WEGE3.SA,2025-04-14,53.669323
73,WEGE3.SA,2025-04-15,54.342106
74,WEGE3.SA,2025-04-16,52.840630
75,WEGE3.SA,2025-04-17,54.466328


In [32]:
stock_option = 'WEGE3.SA'
plot_df : pd.DataFrame = predict_result[predict_result.unique_id==stock_option].drop('unique_id', axis=1)

logger.info(f'Resultado da previsão: {predict_result}')


2025-02-04 21:29:30,436 - model - INFO - Resultado da previsão:    unique_id         ds       LSTM
0   WEGE3.SA 2025-02-01  52.364040
1   WEGE3.SA 2025-02-02  51.475018
2   WEGE3.SA 2025-02-03  52.254383
3   WEGE3.SA 2025-02-04  52.672131
4   WEGE3.SA 2025-02-05  53.265884
..       ...        ...        ...
72  WEGE3.SA 2025-04-14  53.669323
73  WEGE3.SA 2025-04-15  54.342106
74  WEGE3.SA 2025-04-16  52.840630
75  WEGE3.SA 2025-04-17  54.466328
76  WEGE3.SA 2025-04-18  53.689232

[77 rows x 3 columns]


In [22]:
predict_result

Unnamed: 0,unique_id,ds,LSTM
0,WEGE3.SA,2025-02-01,52.364040
1,WEGE3.SA,2025-02-02,51.475018
2,WEGE3.SA,2025-02-03,52.254383
3,WEGE3.SA,2025-02-04,52.672131
4,WEGE3.SA,2025-02-05,53.265884
...,...,...,...
72,WEGE3.SA,2025-04-14,53.669323
73,WEGE3.SA,2025-04-15,54.342106
74,WEGE3.SA,2025-04-16,52.840630
75,WEGE3.SA,2025-04-17,54.466328


In [46]:
df_hist = pl.read_delta('../deltalake',version=3).to_pandas()
df_hist = df_hist.sort_values(by=['unique_id', 'ds']).reset_index(drop=True)
logger.info(f'Dados do deltalake carregados. Tamanho do dataset: {len(df_hist)}')


2025-02-04 21:38:25,374 - model - INFO - Dados do deltalake carregados. Tamanho do dataset: 6299


In [47]:
df_hist['unique_id'].unique()

array(['ABEV3.SA'], dtype=object)

In [34]:

df_hist.drop_duplicates(subset=['ds', 'unique_id'], inplace=True)
logger.info(f'Dados após a remoção de duplicatas. Tamanho do dataset: {len(df_hist)}')


2025-02-04 21:29:42,629 - model - INFO - Dados após a remoção de duplicatas. Tamanho do dataset: 6299


In [35]:

df_hist = df_hist[df_hist['ds'] >= '2024-06-01']
logger.debug(f'Header histórico:\n{df_hist.shape}')
logger.debug(f'Header previsto:\n{predict_result.shape}')



2025-02-04 21:29:47,767 - model - DEBUG - Header histórico:
(169, 3)
2025-02-04 21:29:47,769 - model - DEBUG - Header previsto:
(77, 3)


In [36]:
plot_df = pd.concat([df_hist, plot_df])
logger.debug(f'Dados concatenados : \n{plot_df.shape}')




2025-02-04 21:29:51,164 - model - DEBUG - Dados concatenados : 
(246, 4)


In [37]:
plot_df

Unnamed: 0,ds,y,unique_id,LSTM
6130,2024-06-03,37.044369,WEGE3.SA,
6131,2024-06-04,37.500725,WEGE3.SA,
6132,2024-06-05,37.242783,WEGE3.SA,
6133,2024-06-06,37.510647,WEGE3.SA,
6134,2024-06-07,37.550327,WEGE3.SA,
...,...,...,...,...
72,2025-04-14,,,53.669323
73,2025-04-15,,,54.342106
74,2025-04-16,,,52.840630
75,2025-04-17,,,54.466328


In [39]:
logger.info(f'Gerando plot para as previsões de {stock_option}')
plt.plot(df_hist['ds'], df_hist['y'], c='black', label=f'Valores históricos para {stock_option}')
plt.plot(plot_df['ds'], plot_df['LSTM'], c='purple', label=f'Previsões para {stock_option}')
plt.legend()
plt.grid()
plt.plot()

plot_name : str = f'../reports/neuralforecast_lstm_{stock_option}_{datetime.now().date()}.png'

logger.info(f'Imagem do plot salva em {plot_name}')
plt.savefig(plot_name, dpi=300)
plt.close()

2025-02-04 21:30:11,048 - model - INFO - Gerando plot para as previsões de WEGE3.SA
2025-02-04 21:30:11,057 - model - INFO - Imagem do plot salva em ../reports/neuralforecast_lstm_WEGE3.SA_2025-02-04.png
