In [21]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

from prophet import Prophet
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima.model import ARIMA


import warnings
warnings.filterwarnings('ignore')

In [22]:
mpl.rcParams['figure.figsize'] = (12, 8)
mpl.rcParams['axes.grid'] = False

from matplotlib.colors import ListedColormap, LinearSegmentedColormap
colors = mpl.colormaps['coolwarm'].resampled(10)
colors = colors(np.linspace(0, 1, 10))

In [23]:
df = pd.read_csv("Anon_Cust_Order_Data.csv")
df.head()

Unnamed: 0,Order_ID,CustomerID,Date,State,City,ZIP,Product,Quantity,Price_Each
0,176558,1,4/19/19 8:46,TX,Dallas,75001,USB-C Charging Cable,2,11.95
1,176559,2,4/7/19 22:30,MA,Boston,2215,Bose SoundSport Headphones,1,99.99
2,176560,3,4/12/19 14:38,CA,Los Angeles,90001,Google Phone,1,600.0
3,176560,3,4/12/19 14:38,CA,Los Angeles,90001,Wired Headphones,1,11.99
4,176561,4,4/30/19 9:27,CA,Los Angeles,90001,Wired Headphones,1,11.99


### Load cleaned DF from EDA analysis

In [30]:
products_wide = pd.read_csv("products_wide.csv")
products_wide['Date'] =  pd.to_datetime(products_wide['Date'], utc=True).dt.date
products_wide['Date'] =  pd.to_datetime(products_wide['Date'], infer_datetime_format=True)
products_wide = products_wide.sort_values('Date').set_index('Date', drop=True)

display(products_wide.head())
display(products_wide.info())

Unnamed: 0_level_0,20in Monitor,27in 4K Gaming Monitor,27in FHD Monitor,34in Ultrawide Monitor,AA Batteries (4-pack),AAA Batteries (4-pack),Apple Airpods Headphones,Bose SoundSport Headphones,Flatscreen TV,Google Phone,LG Dryer,LG Washing Machine,Lightning Charging Cable,Macbook Pro Laptop,ThinkPad Laptop,USB-C Charging Cable,Vareebadd Phone,Wired Headphones,iPhone
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2019-01-01,4.0,7.0,8.0,18.0,33.0,61.0,21.0,15.0,9.0,12.0,1.0,2.0,42.0,9.0,9.0,43.0,2.0,31.0,16.0
2019-01-02,9.0,14.0,17.0,8.0,54.0,43.0,35.0,19.0,9.0,12.0,3.0,1.0,36.0,9.0,11.0,36.0,2.0,35.0,15.0
2019-01-03,6.0,14.0,16.0,12.0,44.0,40.0,21.0,24.0,5.0,8.0,1.0,3.0,43.0,4.0,3.0,43.0,1.0,31.0,11.0
2019-01-04,7.0,8.0,15.0,11.0,30.0,41.0,24.0,16.0,10.0,11.0,3.0,1.0,36.0,10.0,8.0,57.0,7.0,28.0,7.0
2019-01-05,5.0,13.0,16.0,8.0,50.0,63.0,23.0,23.0,5.0,11.0,,1.0,41.0,4.0,7.0,30.0,1.0,47.0,7.0


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 365 entries, 2019-01-01 to 2019-12-31
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   20in Monitor                365 non-null    float64
 1   27in 4K Gaming Monitor      365 non-null    float64
 2   27in FHD Monitor            365 non-null    float64
 3   34in Ultrawide Monitor      365 non-null    float64
 4   AA Batteries (4-pack)       365 non-null    float64
 5   AAA Batteries (4-pack)      365 non-null    float64
 6   Apple Airpods Headphones    365 non-null    float64
 7   Bose SoundSport Headphones  365 non-null    float64
 8   Flatscreen TV               365 non-null    float64
 9   Google Phone                365 non-null    float64
 10  LG Dryer                    299 non-null    float64
 11  LG Washing Machine          312 non-null    float64
 12  Lightning Charging Cable    365 non-null    float64
 13  Macbook Pro Lapt

None

## Start building a baseline forecast.

- Using Prophet as baseline ML model. Structure df for Prophet

In [31]:
# Take one Sample:
df0 = pd.DataFrame(products_wide['20in Monitor'])
df0 = df0.resample('W').sum().reset_index()
df0.columns = ['ds', 'y']

m = Prophet()
m.fit(df0)

10:16:54 - cmdstanpy - INFO - Chain [1] start processing
10:16:55 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x7fb66148ed90>

In [32]:
future = m.make_future_dataframe(periods=20)

future = future.set_index('ds').resample('W').sum()
future

2019-01-06
2019-01-13
2019-01-20
2019-01-27
2019-02-03
2019-02-10
2019-02-17
2019-02-24
2019-03-03
2019-03-10
2019-03-17


## Time Series Forecasting

The stats library in Python has tools for building several other time-series prediction models - ARMA, ARIMA, and SARIMA - with just a few lines of code. 

Since all of these models are available in a single library, we can easily run many Python forecasting experiments using different models in the same script or notebook when conducting time series forecasting in Python. 

https://builtin.com/data-science/time-series-forecasting-python