In [1]:
#!/usr/bin/env python
# coding: utf-8

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import os

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

plt.style.use('seaborn-darkgrid')

# Ajuste os caminhos conforme sua estrutura
DATA_PATH = '../data'
DATA_FILE = 'dados.csv'
full_path = os.path.join(DATA_PATH, DATA_FILE)

df = pd.read_csv(full_path, sep=',', decimal='.', parse_dates=['Data'], dayfirst=True)

# Colunas
col_data = 'Data'
col_qty = 'Quantidade'

# Agrupar por dia a quantidade vendida
daily_qty = df.groupby(col_data)[col_qty].sum().to_frame(name='quantidade_vendida_diaria')

# Converter índice em datetime se não estiver
daily_qty.index = pd.to_datetime(daily_qty.index)

# Criar um índice diário completo do menor ao maior dia
data_inicial = daily_qty.index.min()
data_final = daily_qty.index.max()
full_range = pd.date_range(start=data_inicial, end=data_final, freq='D')

# Reindexar e preencher ausentes com 0, pois dia sem venda = 0
daily_qty = daily_qty.reindex(full_range, fill_value=0)

print(daily_qty.head())
print(daily_qty.tail())
print(daily_qty.isna().sum())

# Definir período de treino e teste
train_end_date = datetime(2023, 12, 31)
test_end_date = datetime(2024, 3, 30)

train_data = daily_qty.loc[:train_end_date]
test_data = daily_qty.loc[datetime(2024,1,1):test_end_date]

# Visualização dos dados de quantidade
plt.figure(figsize=(14,5))
plt.plot(daily_qty.index, daily_qty['quantidade_vendida_diaria'], label='Todos Dados')
plt.axvline(x=train_end_date, color='red', linestyle='--', label='Limite Treino/Teste')
plt.title('Quantidade Vendida Diária - Histórico')
plt.legend()
plt.show()

# Normalizar os dados
scaler = StandardScaler()
train_scaled = scaler.fit_transform(train_data)

lookback = 30
def create_sequences(data, lookback=30):
    X, y = [], []
    for i in range(len(data)-lookback):
        X.append(data[i:i+lookback])
        y.append(data[i+lookback])
    return np.array(X), np.array(y)

X_train, y_train = create_sequences(train_scaled, lookback=lookback)

# Preparar dados de teste
last_days_train = train_data[-lookback:]
test_full = pd.concat([last_days_train, test_data], axis=0)
test_scaled = scaler.transform(test_full)

X_test, y_test = create_sequences(test_scaled, lookback=lookback)

print("X_train.shape:", X_train.shape)
print("y_train.shape:", y_train.shape)
print("X_test.shape:", X_test.shape)
print("y_test.shape:", y_test.shape)

# Construção do modelo LSTM (exemplo com 3 camadas, dropout, etc.)
model = Sequential()
model.add(LSTM(256, return_sequences=True, input_shape=(lookback, 1)))
model.add(Dropout(0.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(64))
model.add(Dropout(0.2))
model.add(Dense(1))

optimizer = Adam(learning_rate=0.0005)
model.compile(optimizer=optimizer, loss='mae')

model.summary()

# Treinamento do modelo
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, verbose=1)

# Visualização da curva de loss
plt.figure(figsize=(10,4))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Curva de treinamento - Quantidade Vendida')
plt.xlabel('Épocas')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Previsão no teste
y_pred_scaled = model.predict(X_test)
y_pred = scaler.inverse_transform(y_pred_scaled)
y_test_original = scaler.inverse_transform(y_test)

test_dates = test_data.index

# Verificar tamanhos
print("Len test_dates:", len(test_dates))
print("Len y_test_original:", len(y_test_original))
print("Len y_pred:", len(y_pred))

if len(test_dates) == len(y_test_original) == len(y_pred):
    df_pred = pd.DataFrame({
        'Data': test_dates,
        'Quantidade Real': y_test_original.flatten(),
        'Predição': y_pred.flatten()
    })
    df_pred.set_index('Data', inplace=True)
    print(df_pred.head())
    
    # Métricas de avaliação
    mae = mean_absolute_error(df_pred['Quantidade Real'], df_pred['Predição'])
    mse = mean_squared_error(df_pred['Quantidade Real'], df_pred['Predição'])
    rmse = np.sqrt(mse)
    
    print(f"MAE: {mae:.4f}")
    print(f"RMSE: {rmse:.4f}")
    
    # Visualização da comparação
    plt.figure(figsize=(14,5))
    plt.plot(df_pred.index, df_pred['Quantidade Real'], label='Quantidade Real')
    plt.plot(df_pred.index, df_pred['Predição'], label='Predição')
    plt.title('Comparação entre Real e Previsto (Teste) - Quantidade Vendida')
    plt.legend()
    plt.show()
else:
    print("Tamanhos diferentes entre arrays. Verifique novamente o período de teste e criação de sequências.")


2024-12-18 13:57:38.393915: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1734541058.409837 1648480 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1734541058.414514 1648480 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-18 13:57:38.430043: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


OSError: 'seaborn-darkgrid' is not a valid package style, path of style file, URL of style file, or library style name (library styles are listed in `style.available`)