# Royalty Regression Model
### Use this for Calucting Predicted Monthly Sales

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import sklearn.metrics
from sklearn.model_selection import train_test_split
import statsmodels.api as sm
import os
import joblib

In [2]:
os.chdir(r'G:\FinanceReports\2022\Wk52')
srs=pd.read_excel('2022-SRSsales-productLevel-wk1-52 1.18.2023.xlsx', sheet_name='2022')# This gets us what the shops have sold

In [3]:
#changes the directory 
os.chdir(r'L:\J.Harned\Royalty')
invoice=pd.read_excel('2022 Invoice History - ms reports.xlsx', sheet_name='Detail')# This gets us what the shops have ordered

In [4]:
#Last date of invoices in file
srs=srs[srs['EndDate']<='2022-12-31']
#Filters only dates from 12/31/2022 and before
invoice=invoice[invoice.INVOICEDATE<='2022-12-31']

In [5]:
#Renames columns
invoice.rename(columns={'CUSTOMERNO':'ShopID'},inplace=True)
#Gets month variable and creates a column for the numeric value of the month
srs['Month']=srs.StartDate.dt.month
#Gets month variable and creates a column for the numeric value of the month
invoice['Month']=invoice.SHIPDATE.dt.month

In [6]:
#From Invoice only chooses Custard and Mix Products
inv_royalty=invoice[(invoice.PRODUCTLINEDESC=='CUSTARD')|(invoice.PRODUCTLINEDESC=='MIXES')|(invoice.PRODUCTLINEDESC=='MIXES - SUGARFREE')|(invoice.PRODUCTLINEDESC=='CUSTARD - KOSHER')]

In [7]:
#Gets the royalty amount paid by the Franchisees and groups them by Month and Shop. Need this to compute the ARS
royalty_month=inv_royalty.groupby(['Month','ShopID','CUSTOMERNAME'])['Ext Royalty'].sum().reset_index()

In [8]:
#Inv_group variable grouped by Shop ID Customer Name , Month and Product desc. Most Important is Quantity Shipped sum and mean for the months
inv_group=inv_royalty.groupby(['ShopID','CUSTOMERNAME','Month'])[['QUANTITYSHIPPED']].agg({'QUANTITYSHIPPED':['mean','sum']})
#Renames grouped Columns in order to perform the merge
inv_group.columns=['Quantity Mean','Quantity Sum']
inv_group=inv_group.reset_index()

In [9]:
#Grabs only the Product Types that are part of royalty calc
royalty=srs[(srs.ProductTypeName=='Italian Ice')|(srs.ProductTypeName=='Gelati')|(srs.ProductTypeName=='Misto')|(srs.ProductTypeName=='Custard')|(srs.ProductTypeName=='Milkshakes')|(srs.ProductTypeName=='Frozen Beverages')|(srs.ProductTypeName=='Hand Scooped Custard')|
(srs.ProductTypeName=='Concrete')|(srs.ProductTypeName=='Blendini')]
#Groups Royaly variable by Shop ID and Month
royalty=royalty.groupby(['ShopID','Month'])[['SalesQty','SalesMny']].sum().reset_index()

In [10]:
#Merges Royalty sum of custard and mixes sales with invoice sum of custard and mixes shipped
merge_df=pd.merge(royalty,inv_group, on=['ShopID','Month'])
#rearranges columns of merge_df
merge_df=merge_df[['CUSTOMERNAME','ShopID','Month','Quantity Mean','Quantity Sum','SalesQty','SalesMny']]
#Changes Name of SalesMny to Actual Sales
#merge_df.rename(columns={"SalesMny":"Actual Sales"}, inplace=True)

In [11]:
merge_df

Unnamed: 0,CUSTOMERNAME,ShopID,Month,Quantity Mean,Quantity Sum,SalesQty,SalesMny
0,PA WOODHAVEN,1,2,2.571429,18.0,1890,7973.302
1,PA WOODHAVEN,1,3,3.315789,63.0,8329,34891.222
2,PA WOODHAVEN,1,4,2.483871,77.0,9575,40006.452
3,PA WOODHAVEN,1,5,3.300000,66.0,14342,61534.163
4,PA WOODHAVEN,1,6,2.468750,79.0,12372,53010.612
...,...,...,...,...,...,...,...
4398,MD ORIOLE PARK-CAMDEN YARDS,1492,5,20.000000,20.0,5686,47263.500
4399,MD ORIOLE PARK-CAMDEN YARDS,1492,6,7.000000,42.0,1646,13387.500
4400,MD ORIOLE PARK-CAMDEN YARDS,1492,7,6.285714,44.0,8521,72397.500
4401,MD ORIOLE PARK-CAMDEN YARDS,1492,8,7.166667,43.0,7749,66442.000


In [12]:
#Sets Target Varaiable as the Sales Column. This is what we are trying to predict
target=merge_df.iloc[:,-1]
target=pd.DataFrame(target)
target

Unnamed: 0,SalesMny
0,7973.302
1,34891.222
2,40006.452
3,61534.163
4,53010.612
...,...
4398,47263.500
4399,13387.500
4400,72397.500
4401,66442.000


In [13]:
#Sets the independent varaibles we want to runn the model on. Currently it is Month, Quantity Sum and Sales Qty.
#These are the variables we want to use to predict sales
features=merge_df.drop(['CUSTOMERNAME','ShopID','SalesMny'],axis=1)
features.drop('Quantity Mean',axis=1,inplace=True)
features

Unnamed: 0,Month,Quantity Sum,SalesQty
0,2,18.0,1890
1,3,63.0,8329
2,4,77.0,9575
3,5,66.0,14342
4,6,79.0,12372
...,...,...,...
4398,5,20.0,5686
4399,6,42.0,1646
4400,7,44.0,8521
4401,8,43.0,7749


In [14]:
#Splits are data( features and targets) into a testing and training set
x_train, x_test,y_train,y_test=train_test_split(features,target,test_size=.2, random_state=42)

In [15]:
# This is our simple Linear Regression model. We use the feature training set and the target training set to "teach" our model
# what are the correct answers
reg=LinearRegression()
model=reg.fit(x_train,y_train)

In [16]:
#These are intercepts and coefficents of our model.
model.intercept_, model.coef_

(array([-819.49925229]), array([[124.99796501,  -9.96740799,   4.39938991]]))

In [17]:
#Model scores. Closer to 1 the better the model
model.score(x_test,y_test)

0.9124711787683919

In [18]:
#This will save our model for future use
os.chdir(r'L:\J.Harned\Royalty\Royalty Regression')
joblib.dump(model,'royalty_regression_model')

['royalty_regression_model']