In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from pandas.plotting import autocorrelation_plot

# Caminho relativo para o CSV
script_dir = os.getcwd()
csv_path = os.path.join(script_dir, "../data/Coffee_sales.csv")
csv_path = os.path.abspath(csv_path)

# Leitura
df = pd.read_csv(csv_path)
display(df.head())


In [None]:
df.info()


In [None]:
df.shap

In [None]:
# Padronizando em 'snake_case'
df.columns = df.columns.str.replace(' ', '_').str.lower()

print('hífens:', df['date'].str.contains('-').sum())
print('espaços:', df['date'].str.contains(' ').sum())


In [None]:
# Agrupa por data e soma o valor de vendas
df_prophet = df.groupby('date')['money'].sum().reset_index()
df_prophet = df_prophet.rename(columns={'date': 'ds', 'money': 'y'})

# Conversão de data e reamostragem semanal
df_prophet['ds'] = pd.to_datetime(df_prophet['ds'], format='mixed')
df_weekly = df_prophet.resample('W-MON', on='ds').sum().reset_index()
df_weekly.head()


In [None]:
# Suavização para reduzir ruído
df_weekly['y_smooth'] = df_weekly['y'].rolling(3, center=True).mean()

plt.figure(figsize=(10, 5))
plt.plot(df_weekly['ds'], df_weekly['y'], label='Original', alpha=0.4)
plt.plot(df_weekly['ds'], df_weekly['y_smooth'], label='Média móvel (3 semanas)', color='red')
plt.title('Série temporal — Vendas de Café (suavizada)')
plt.legend()
plt.grid()
plt.show()


In [None]:
H = 8  # horizonte de teste (8 semanas)

train = df_weekly.iloc[:-H][['ds', 'y_smooth']].dropna().rename(columns={'y_smooth': 'y'})
test = df_weekly.iloc[-H:][['ds', 'y_smooth']].rename(columns={'y_smooth': 'y'})

m = Prophet()
m.fit(train)

future = m.make_future_dataframe(periods=H, freq='W-MON')
forecast = m.predict(future)

fig = m.plot(forecast)
plt.title("Previsão com Prophet")
plt.show()


In [None]:
# Autocorrelação
autocorrelation_plot(df_weekly['y'])

# Métricas
eval_df = forecast[['ds', 'yhat']].merge(test, on='ds', how='right')
eval_df = eval_df.rename(columns={'y': 'y_true', 'yhat': 'y_pred'})
eval_df.dropna(inplace=True)

mae = mean_absolute_error(eval_df['y_true'], eval_df['y_pred'])
rmse = np.sqrt(mean_squared_error(eval_df['y_true'], eval_df['y_pred']))
r2 = r2_score(eval_df['y_true'], eval_df['y_pred'])

print(f"MAE : {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R²  : {r2:.3f}")
