In [None]:
### time series forecasting 
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

from src import config

pd.set_option("display.max_columns", None)


In [None]:
!pip install statsmodels
!pip install xgboost

In [None]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
# read in data 
df = pd.read_parquet(config.INT_FILE_PATH / 'transactions.parquet')

In [None]:
df.head()

In [None]:
df = df[["order_purchase_timestamp", "order_total_price"]]

In [None]:
weekly_sales = df.set_index("order_purchase_timestamp").resample("W")[["order_total_price"]].sum()

In [None]:
weekly_sales

In [None]:
decompose = seasonal_decompose(weekly_sales, extrapolate_trend=12)

In [None]:
#Trend
obs = decompose.observed
#Trend
trend = decompose.trend
#Seazonal
season = decompose.seasonal
#Error
random = decompose.resid


In [None]:
fig, axes = plt.subplots(4, 1, figsize=(15,8), sharex=True)
fig.suptitle('Time Series of Purchase Values')

sns.lineplot(x=obs.index, y=obs, ax=axes[0], data=obs)
sns.lineplot(x=trend.index, y=trend, ax=axes[1], data=trend)
sns.lineplot(x=season.index, y=season, ax=axes[2], data=season)
sns.lineplot(x=random.index, y=random, ax=axes[3], data=random)

In [None]:
weekly_sales.info()

In [None]:
train_size = int(len(weekly_sales) * 0.80)
train, test = weekly_sales[0:train_size], weekly_sales[train_size:]

In [None]:
for df in [train, test]: 
    df["X"] = df["order_total_price"].shift(1)

In [None]:
train.head()

In [None]:
X_train, y_train = train[["X"]], train["order_total_price"]
X_test, y_test = test[["X"]], test["order_total_price"]

In [None]:
xgb_model = xgb.XGBRegressor(n_estimators=150, learning_rate=0.05)
xgb_model.fit(X_train, y_train, 
        early_stopping_rounds=5, 
        eval_set=[(X_test, y_test)], 
        verbose=False)

xgb_pred = xgb_model.predict(X_test)

print('RMSE for xgb was: \n', mean_squared_error(y_test, xgb_pred, squared=False))

In [None]:

train_pred = pd.Series(xgb_model.predict(X_train))
test_pred = pd.Series(xgb_model.predict(X_test))

predictions = pd.concat([train_pred, test_pred], axis=0)
predictions.index = weekly_sales.index

ax = plt.gca()

weekly_sales[['order_total_price']].plot(figsize=(15, 6), ax=ax)
predictions.plot(figsize=(15, 6), ax=ax, color="orange")
plt.show()