In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error,mean_squared_error,r2_score
from sklearn.linear_model import LinearRegression
import pickle

### 1. You are required to build predictive model using 

Customer 360 (created in Task-2) for the below task using Statsmodels package.
a. You are required to predict total order value (target variable), and the other variables as X variables (whichever variables are applicable)

b. Calculate goodness of fit metrics like MAPE, R-SQURE, MSE, RMSE etc.

c. Validate the model

d. Provide mathematical equation of the model

e. Create python pickle file of the model

In [2]:
customer_360=pd.read_csv("customer_360.csv")
customer_360

Unnamed: 0,CUSTOMER_KEY,CONTACT_NUMBER,Referred_Other_customers,Gender,Location,Acquired_Channel,No_of_Orders,Total_Order_value,Total_Discount,Total_Orders_with_discount,Total_Orders_received_late,Total_Orders_returned,Maximum_Order_value,First_Transaction_Date,Last_Transaction_Date,Tenure_Months,No_of_orders_with_Zero_value
0,C100000,9693593216,N,M,Chennai,APP,25,3862.50,0.0,0,0,0,335.15,2016-05-18,2016-07-15,1,5
1,C100001,9770151476,Y,M,Bangalore,WEBSITE,1,307.52,0.0,0,0,0,307.52,2016-05-18,2016-05-18,0,0
2,C100002,9779085726,N,M,Gurgaon,WEBSITE,16,1081.08,2256.0,8,0,0,270.27,2016-05-18,2016-05-25,0,12
3,C100003,9298611643,N,M,Gurgaon,APP,1,153.73,0.0,0,0,0,153.73,2016-05-18,2016-05-18,0,0
4,C100004,9929250328,Y,M,Chennai,APP,1,-39.55,0.0,0,0,0,-39.55,2016-05-18,2016-05-18,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11130,C19486,9623798327,N,M,Gurgaon,WEBSITE,1,751.38,0.0,0,0,0,751.38,2016-07-18,2016-07-18,0,0
11131,C19487,9703385893,N,M,Others,WEBSITE,1,104.11,0.0,0,0,0,104.11,2016-07-18,2016-07-18,0,0
11132,C19488,9200667897,N,F,Chennai,WEBSITE,1,2310.99,200.0,1,0,0,2310.99,2016-07-18,2016-07-18,0,0
11133,C19489,9485447390,N,M,Bangalore,WEBSITE,1,91.77,0.0,0,0,0,91.77,2016-07-19,2016-07-19,0,0


In [3]:
# Select predictor variables (X) and target variable (total order value)
x=customer_360[['No_of_Orders','Total_Discount','Total_Orders_with_discount','Total_Orders_received_late']]
y=customer_360['Total_Order_value']

In [4]:
# Split the data into training and testing sets
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

In [5]:
x_train=sm.add_constant(x_train)


In [6]:
#Build the model
model = sm.OLS(y_train, x_train).fit()

In [7]:
x_test=sm.add_constant(x_test)

In [8]:
y_pred=model.predict(x_test)

In [12]:
# Calculate goodness-of-fit metrics
mape=mean_absolute_percentage_error(y_test,y_pred)
mse=mean_squared_error(y_test,y_pred)
rmse=mse**0.5
r_squared=r2_score(y_test,y_pred)

In [13]:
print("Mean Absolute Percentage Error (MAPE):", mape)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared (R^2):", r_squared)


Mean Absolute Percentage Error (MAPE): 4.43058293889168e+17
Mean Squared Error (MSE): 1098875502.6876144
Root Mean Squared Error (RMSE): 33149.291134013925
R-squared (R^2): 0.6180920754991343


In [14]:
model.summary()

0,1,2,3
Dep. Variable:,Total_Order_value,R-squared:,0.796
Model:,OLS,Adj. R-squared:,0.796
Method:,Least Squares,F-statistic:,8669.0
Date:,"Fri, 03 May 2024",Prob (F-statistic):,0.0
Time:,15:32:08,Log-Likelihood:,-103050.0
No. Observations:,8908,AIC:,206100.0
Df Residuals:,8903,BIC:,206200.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1198.4749,283.663,4.225,0.000,642.430,1754.520
No_of_Orders,708.8335,7.119,99.569,0.000,694.879,722.788
Total_Discount,-7.5743,0.876,-8.646,0.000,-9.292,-5.857
Total_Orders_with_discount,662.8563,47.949,13.824,0.000,568.864,756.848
Total_Orders_received_late,-69.7540,96.862,-0.720,0.471,-259.625,120.117

0,1,2,3
Omnibus:,12973.939,Durbin-Watson:,2.007
Prob(Omnibus):,0.0,Jarque-Bera (JB):,19459300.573
Skew:,8.186,Prob(JB):,0.0
Kurtosis:,231.384,Cond. No.,349.0


In [16]:
# Save the model as a pickle file
with open('customer_360_model.pkl', 'wb') as file:pickle.dump(model, file)