# Importing the libraries

In [34]:
import pandas as pd
import seaborn as sb
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet

# Reading the dataset and printing the main statistics

In [35]:
dataset = pd.read_csv('/content/data.csv')
print('==================================')
print(f'The shape of the dataset is {dataset.shape}.')
print(f'\n')
print(f'The columns present in the dataset are {list(dataset.columns)}')
print(f'\n')
print('==================================')
dataset = dataset.loc[:, ~dataset.columns.str.contains('^Unnamed')] # removing the garbage column
dataset["day"] = pd.to_datetime(dataset["day"]) # converting the data type of the day column to date time
dataset.sort_values(by='day', inplace=True) # sorting the dataset in ascending order
dataset = dataset.reset_index(drop=True) # resetting the index of the dataframe
dataset_deduped = dataset.drop_duplicates() # keeping the first duplicate row 
print(f'The number of missing values after removing the duplicates in each of the columns are \n{dataset_deduped.isna().sum()}')
print(f'\n')
print('==================================')
dataset_deduped.head()

The shape of the dataset is (1878, 9).


The columns present in the dataset are ['Unnamed: 0', 'day', 'item_number', 'item_name', 'purchase_price', 'suggested_retail_price', 'orders_quantity', 'sales_quantity', 'revenue']


The number of missing values after removing the duplicates in each of the columns are 
day                         0
item_number                 0
item_name                   0
purchase_price              0
suggested_retail_price      0
orders_quantity             0
sales_quantity              0
revenue                   476
dtype: int64




Unnamed: 0,day,item_number,item_name,purchase_price,suggested_retail_price,orders_quantity,sales_quantity,revenue
0,2021-04-03,80028349,UH ZWIEBELN DE-HE I 1KG GS,0.674928,1.055314,0.0,23.0,22.77
1,2021-04-03,80317483,ROMA TOMATEN ES I 500G SF,0.978451,1.546338,40.0,17.0,25.33
4,2021-04-03,80101923,SL MANDARINEN BEH.ES I 750G GS,1.991321,3.216415,0.0,2.0,5.98
6,2021-04-04,80028349,UH ZWIEBELN DE-HE I 1KG GS,0.674928,1.055314,0.0,0.0,
7,2021-04-04,80101923,SL MANDARINEN BEH.ES I 750G GS,1.991321,3.216415,0.0,0.0,


# Modelling 

In [66]:
group_by_result_1 = dataset_deduped.groupby(['day','item_name']).aggregate({
                             'orders_quantity':'sum',
                             'sales_quantity':'sum'})
group_by_result_1.reset_index(inplace=True) # resetting the index to get the index values as columns 
group_by_result_1

Unnamed: 0,day,item_name,orders_quantity,sales_quantity
0,2021-04-03,ROMA TOMATEN ES I 500G SF,40.0,17.0
1,2021-04-03,SL MANDARINEN BEH.ES I 750G GS,0.0,2.0
2,2021-04-03,UH ZWIEBELN DE-HE I 1KG GS,0.0,23.0
3,2021-04-04,ROMA TOMATEN ES I 500G SF,0.0,0.0
4,2021-04-04,SL MANDARINEN BEH.ES I 750G GS,0.0,0.0
...,...,...,...,...
934,2022-02-08,SL MANDARINEN BEH.ES I 750G GS,0.0,3.0
935,2022-02-08,UH ZWIEBELN DE-HE I 1KG GS,18.0,18.0
936,2022-02-09,ROMA TOMATEN ES I 500G SF,0.0,16.0
937,2022-02-09,SL MANDARINEN BEH.ES I 750G GS,0.0,3.0


In [67]:
time_series_1 = group_by_result_1.drop(['sales_quantity'], axis=1)
time_series_2 = group_by_result_1.drop(['orders_quantity'], axis=1)

In [68]:
time_series_1.rename(columns={'day':'ds', 'item_name': 'ticker', 'orders_quantity':'y'}, inplace=True)
time_series_2.rename(columns={'day':'ds', 'item_name': 'ticker', 'sales_quantity':'y'}, inplace=True)

## Making the forecast of orders and sales happened for the next 15 days using Facebook Prophet model

In [69]:
def train_and_forecast(group):
  # reference : https://medium.com/grabngoinfo/3-ways-for-multiple-time-series-forecasting-using-prophet-in-python-7a0709a117f9

  # Initiate the model
  m = Prophet()
  
  # Fit the model
  m.fit(group)

  # Make predictions
  future = m.make_future_dataframe(periods=15)
  forecast = m.predict(future)[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
  forecast['ticker'] = group['ticker'].iloc[0]
  
  # Return the forecasted results
  return forecast[['ds', 'ticker', 'yhat', 'yhat_upper', 'yhat_lower']]  

In [70]:
from time import time 
start_time = time()

orders_forecasts = pd.DataFrame()
sales_forecast = pd.DataFrame()

# Loop through each ticker
for item_name in ['ROMA TOMATEN ES I 500G SF', 'SL MANDARINEN BEH.ES I 750G GS', 'UH ZWIEBELN DE-HE I 1KG GS']:
  # Get the data for the ticker
  group1_orders_made = time_series_1[time_series_1['ticker'].isin([item_name])] 
  group2_sales_made = time_series_2[time_series_2['ticker'].isin([item_name])] 
 
  # Make forecast
  forecast_1 = train_and_forecast(group1_orders_made) # making the forecasts for orders
  forecast_2 = train_and_forecast(group2_sales_made) # making the forecasts for sales
  # Add the forecast results to the dataframe
  orders_forecasts = pd.concat((orders_forecasts, forecast_1))
  sales_forecast = pd.concat((sales_forecast, forecast_2))

print('The time used for the for-loop forecast is ', time()-start_time)

INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpy7ky2tc3/xpwrong6.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpy7ky2tc3/77i0c4io.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.8/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=32453', 'data', 'file=/tmp/tmpy7ky2tc3/xpwrong6.json', 'init=/tmp/tmpy7ky2tc3/77i0c4io.json', 'output', 'file=/tmp/tmpy7ky2tc3/prophet_modeljb_y605h/prophet_model-20230121175228.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
17:52:28 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
17:52:28 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling yearly seasonality

The time used for the for-loop forecast is  4.087908983230591


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  forecast['ticker'] = group['ticker'].iloc[0]


In [74]:
time_series_1.head()

Unnamed: 0,ds,ticker,y
0,2021-04-03,ROMA TOMATEN ES I 500G SF,40.0
1,2021-04-03,SL MANDARINEN BEH.ES I 750G GS,0.0
2,2021-04-03,UH ZWIEBELN DE-HE I 1KG GS,0.0
3,2021-04-04,ROMA TOMATEN ES I 500G SF,0.0
4,2021-04-04,SL MANDARINEN BEH.ES I 750G GS,0.0


In [71]:
orders_forecasts.head()

Unnamed: 0,ds,ticker,yhat,yhat_upper,yhat_lower
0,2021-04-03,ROMA TOMATEN ES I 500G SF,16.976497,28.107006,6.077051
1,2021-04-04,ROMA TOMATEN ES I 500G SF,6.66425,18.259364,-3.631693
2,2021-04-05,ROMA TOMATEN ES I 500G SF,15.032938,25.834404,4.660423
3,2021-04-06,ROMA TOMATEN ES I 500G SF,11.391386,22.807607,-0.272994
4,2021-04-07,ROMA TOMATEN ES I 500G SF,12.642184,23.555191,1.185763


In [73]:
sales_forecast.head()

Unnamed: 0,ds,ticker,yhat,yhat_upper,yhat_lower
0,2021-04-03,ROMA TOMATEN ES I 500G SF,12.664441,19.33504,6.594157
1,2021-04-04,ROMA TOMATEN ES I 500G SF,4.864033,11.510144,-1.296097
2,2021-04-05,ROMA TOMATEN ES I 500G SF,9.439622,15.932458,2.508812
3,2021-04-06,ROMA TOMATEN ES I 500G SF,11.059451,17.078067,4.638429
4,2021-04-07,ROMA TOMATEN ES I 500G SF,10.967899,17.428254,4.782067


1. Training is done from 3rd April 2021 to 9th February 2022. Then the prophet model makes the predictions from 10th February 2022 to 24th February 2022 for the next 15 days. This is done for both the orders made and the sales happened over the time period.
2. The facebook prophet model gives the predicted values which is y_hat for the respective days on which the model is trained as well as the next 15 days. 
3. yhat_lower and yhat_upper are the lower and upper bound of the uncertainty interval.
4. Displaying a time series plot for the predicted and the actual orders made and the sales happened would further give us the quality of the trained models.