In [None]:
import os
os.environ["PYCARET_EXPERIMENT_USE_DASK"] = "False"
os.environ["USE_DASK"] = "False"

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pycaret.time_series import setup, compare_models, finalize_model, predict_model, plot_model, tune_model, create_model  

df=pd.read_csv('C:/trabalhodados/ecomm.data.csv')

df['PurchaseDate'] = pd.to_datetime(df['PurchaseDate'])

def clean_data(df):
    # Replace missing values with 0 in column: 'Rating'
    df = df.fillna({'Rating': 0})
    return df

df_clean = clean_data(df.copy())
df_clean.head()

# Calcula a mediana apenas do item Sweater, ignorando valores nulos
mediana_sweater = df_clean.loc[df['ProductName'] == 'Sweater', 'Price'].median()

# Preenche apenas os valores nulos do item Sweater
df_clean.loc[(df_clean['ProductName'] == 'Sweater') & (df_clean['Price'].isnull()), 'Price'] = mediana_sweater

print("\nDepois do preenchimento:\n", df_clean)

# Calcula a mediana de pre√ßos de todos os itens que est√£o nulos de acordo com o seu tipo
df_clean['Price'] = df_clean['Price'].fillna(
    df_clean.groupby('ProductName')['Price'].transform('median')
)


Depois do preenchimento:
      ProductID     ProductName       Category  Price  QuantitySold Promotion  \
0         1001   Badminton Set    Electronics   50.0            77       Yes   
1         1002        Curtains    Electronics  196.0            38       Yes   
2         1003          Hoodie         Beauty  336.0             8        No   
3         1004         Mascara         Beauty  191.0            78        No   
4         1005  Gaming Console         Sports  102.0            74        No   
..         ...             ...            ...    ...           ...       ...   
995       1996    Coffee Maker         Sports  118.0            17       Yes   
996       1997   Exercise Bike  Home & Garden  180.0             0       Yes   
997       1998           Blush    Electronics  136.0            54        No   
998       1999          Camera         Sports  267.0            96        No   
999       2000   Tennis Racket    Electronics  470.0             8       Yes   

     Discoun

In [6]:
df_indexed = df_clean.set_index('PurchaseDate')

df_clean_semestral = df_indexed['QuantitySold'].resample('6MS').sum().to_frame()

# 3. üö® SOLU√á√ÉO DEFINITIVA: Converter o √≠ndice para PeriodIndex antes do setup
# O PyCaret lida melhor com PeriodIndex, e a convers√£o manual evita o erro.
# Usamos '6M' como frequ√™ncia de per√≠odo (6 meses).
df_clean_semestral.index = df_clean_semestral.index.to_period('6M')

In [14]:
exp = setup(
    data=df_clean_semestral,                             #DataFrame com os dados de s√©ries temporais.
    target='QuantitySold',                     #Nome da coluna alvo.
    fh=1,                               #Horizonte de previs√£o (n√∫mero de per√≠odos futuros a prever).
    fold=3,                              #N√∫mero de dobras para valida√ß√£o cruzada.
    session_id=123,                      #Semente para reprodutibilidade.Imagine que o session_id √© como anotar ‚Äúa mesma semente‚Äù antes de plantar.Assim, se voc√™ repetir o plantio com a mesma semente e condi√ß√µes, crescer√° a mesma planta.
    numeric_imputation_target='linear',  #M√©todo de imputa√ß√£o para valores num√©ricos.Conecta os pontos vizinhos com uma reta (geralmente o mais realista).
    verbose=True,                       #Exibir informa√ß√µes detalhadas durante o setup.
)


# ==========================================
# 4. Comparar e treinar
# ==========================================
best_model = compare_models(sort='MAE')   #testa diversos modelos de s√©ries temporais do PyCaret e retorna o melhor com base no MAE.
#tuned_model = tune_model(best_model, optimize='MAE', n_iter=10)
final_model = finalize_model(best_model)  #Treina o modelo selecionado em todo o conjunto de dados dispon√≠vel.

# ==========================================
# 5. Previs√£o futura
# ==========================================

future_predictions = predict_model(final_model, fh=1)                           #Gera previs√µes para os pr√≥ximos 24 per√≠odos (meses).
future_predictions['y_pred'] = np.round(future_predictions['y_pred']).astype(int)  #Converte as previs√µes para n√∫meros inteiros
plot_model(final_model, plot='forecast')                                         #Plota as previs√µes futuras geradas pelo modelo.


Unnamed: 0,Description,Value
0,session_id,123
1,Target,QuantitySold
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(6, 1)"
5,Transformed data shape,"(6, 1)"
6,Transformed train set shape,"(5, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
arima,ARIMA,0.4225,0.3369,126.637,126.637,0.0162,0.016,0.81
naive,Naive Forecaster,0.5715,0.4402,166.6667,166.6667,0.0212,0.021,1.55
grand_means,Grand Means Forecaster,0.5377,0.4596,200.3056,200.3056,0.0252,0.0252,0.8333
croston,Croston,0.6095,0.5349,261.6457,261.6457,0.0323,0.0332,0.0267
theta,Theta Forecaster,1.3335,1.0884,466.6,466.6,0.0585,0.0568,0.0367
exp_smooth,Exponential Smoothing,1.6596,1.34,558.054,558.054,0.0703,0.0679,0.0333
polytrend,Polynomial Trend Forecaster,1.6596,1.34,558.0556,558.0556,0.0703,0.0679,0.7867
ets,ETS,1.66,1.3402,558.1569,558.1569,0.0703,0.0679,0.0433
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.7957,1.4445,596.3389,596.3389,0.0752,0.0725,0.2333
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,1.9256,1.544,633.063,633.063,0.0799,0.0768,0.9367


In [11]:
from pycaret.time_series import *
models()

Unnamed: 0_level_0,Name,Reference,Turbo
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
naive,Naive Forecaster,sktime.forecasting.naive.NaiveForecaster,True
grand_means,Grand Means Forecaster,sktime.forecasting.naive.NaiveForecaster,True
polytrend,Polynomial Trend Forecaster,sktime.forecasting.trend._polynomial_trend_for...,True
arima,ARIMA,sktime.forecasting.arima.ARIMA,True
auto_arima,Auto ARIMA,sktime.forecasting.arima.AutoARIMA,True
exp_smooth,Exponential Smoothing,sktime.forecasting.exp_smoothing.ExponentialSm...,True
ets,ETS,sktime.forecasting.ets.AutoETS,True
theta,Theta Forecaster,sktime.forecasting.theta.ThetaForecaster,True
croston,Croston,sktime.forecasting.croston.Croston,True
bats,BATS,sktime.forecasting.bats.BATS,False


In [30]:
from pycaret.time_series import *

# =========================================================
# 1. Setup
# =========================================================
exp = setup(
    data=df_clean_semestral,
    target='QuantitySold',
    fh=1,                # Horizonte de previs√£o
    fold=4,              # Cross-validation
    session_id=123,
    verbose=True,
    seasonal_period=2
)

# =========================================================
# 2. Criar os modelos individuais
# =========================================================

arima_model = create_model('arima')
naive_model = create_model('naive')
grand_model = create_model('grand_means')

# =========================================================
# 3. Criar o stacking/blending
# =========================================================

stacked_model = blend_models(
    estimator_list=[arima_model, naive_model, grand_model],
    method='mean',    # 'stack' = modelo meta; 'blend' = m√©dia ponderada
    fold=3
)

# =========================================================
# 4. Finalizar o modelo (treinar com toda a s√©rie)
# =========================================================
final_model = finalize_model(stacked_model)

# =========================================================
# 5. Previs√£o futura
# =========================================================
future_predictions = predict_model(final_model, fh=1)
print(future_predictions)

# Plot
plot_model(final_model, plot='forecast')


Unnamed: 0,Description,Value
0,session_id,123
1,Target,QuantitySold
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(6, 1)"
5,Transformed data shape,"(6, 1)"
6,Transformed train set shape,"(5, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE
0,2022-01,,,,,,
1,2022-07,0.103,0.103,57.3732,57.3732,0.007,0.007
2,2023-01,0.1326,0.0983,38.7756,38.7756,0.0049,0.0049
3,2023-07,1.0319,0.8095,283.7622,283.7622,0.0368,0.0361
Mean,NaT,,,,,,
SD,NaT,,,,,,


Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE
0,2022-01,,,557.0,557.0,0.0683,0.0707
1,2022-07,0.0503,0.0503,28.0,28.0,0.0034,0.0034
2,2023-01,0.8205,0.6086,240.0,240.0,0.0302,0.0298
3,2023-07,0.8436,0.6618,232.0,232.0,0.0301,0.0297
Mean,NaT,,,264.25,264.25,0.033,0.0334
SD,NaT,,,189.1724,189.1724,0.0231,0.0241


Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE
0,2022-01,,,557.0,557.0,0.0683,0.0707
1,2022-07,0.5503,0.5503,306.5,306.5,0.0375,0.0382
2,2023-01,0.1219,0.0904,35.6667,35.6667,0.0045,0.0045
3,2023-07,0.9409,0.7381,258.75,258.75,0.0336,0.033
Mean,NaT,,,289.4792,289.4792,0.036,0.0366
SD,NaT,,,185.2176,185.2176,0.0226,0.0235


Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE
0,2022-07,0.2345,0.2345,130.6244,130.6244,0.016,0.0161
1,2023-01,0.3583,0.2658,104.8141,104.8141,0.0132,0.0131
2,2023-07,0.9388,0.7365,258.1707,258.1707,0.0335,0.0329
Mean,NaT,0.5106,0.4123,164.5364,164.5364,0.0209,0.0207
SD,NaT,0.307,0.2296,67.0427,67.0427,0.009,0.0087


            y_pred
2025-01  5833.7907
