In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

#### stage one model - quantity prediction

In [27]:
def stage1_pred(data, input_cols, prods, output_dir):
    #input_cols = ['OrderQuantity_lag1','Temperature', 'Rainfall', 'CPI', 'Female','Age','Income',]
    stage1_models = {}
    tmp = []
    for prod in prods:
        data_tmp = data[data['ProductID']==prod]
        X = data_tmp[input_cols]
        X = sm.add_constant(X)
        y = data_tmp['OrderQuantity']
        model = sm.OLS(y, X).fit()
        stage1_models[prod]=model
        predictions = model.predict(X)
        data_tmp['pred_s1'] = predictions
        tmp.append(data_tmp)
    data = pd.concat(tmp)
    data.to_csv(output_dir, index=False)
    return stage1_models, data

data = pd.read_csv('./data/stage1_data.csv')
## drop rows with missing records
data = data.dropna()
input_cols = ['OrderQuantity_lag1','Temperature', 'Rainfall', 'CPI', 'Female','Age','Income',]
stage1_models, data = stage1_pred(data, input_cols, [1,2,3,4], './data/stage1_model_data.csv')

In [28]:
for prod in [1,2,3,4]:
    print(stage1_models[prod].summary())

                            OLS Regression Results                            
Dep. Variable:          OrderQuantity   R-squared:                       0.907
Model:                            OLS   Adj. R-squared:                  0.907
Method:                 Least Squares   F-statistic:                 2.483e+04
Date:                Sun, 16 Jun 2024   Prob (F-statistic):               0.00
Time:                        20:58:27   Log-Likelihood:                -49827.
No. Observations:               17900   AIC:                         9.967e+04
Df Residuals:                   17892   BIC:                         9.973e+04
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const                 -6.8537      5

In [4]:
data.head()

Unnamed: 0,CustomerID,ProductID,Date,OrderQuantity,OrderQuantity_lag1,Day,Temperature,Rainfall,CPI,Female,Age,Income,intercept,pred_s1
1,1,1,2023-12-21,10,6.0,2,45.34,0.0,100.93,0,60,59662.22,1,11.938797
2,1,1,2023-12-22,14,10.0,3,51.87,62.52,100.93,0,60,59662.22,1,15.354608
3,1,1,2023-12-23,21,14.0,4,50.23,0.0,100.93,0,60,59662.22,1,17.900641
4,1,1,2023-12-24,24,21.0,5,36.22,0.0,100.93,0,60,59662.22,1,24.42502
5,1,1,2023-12-25,33,24.0,6,46.08,5.22,100.93,0,60,59662.22,1,26.147481


#### stage two model - membership likehood prediction

In [31]:
def stage2_pred(data, input_cols):

    X = data[input_cols]
    y = data['membr_choice']
    logit_model = sm.Logit(y, X)
    model = logit_model.fit()
    predictions = model.predict(X)
    data['pred_s2'] = predictions
    return model, data

data = pd.read_csv('./data/stage2_data.csv')
print(data.columns)
## drop rows with missing records
input_cols = ['p1_m','p2_m', 'p3_m', 'p4_m', 'Temperature', 'Rainfall', 'CPI', 'Female','Age','Income',]
stage2_model, data = stage2_pred(data, input_cols)

Index(['CustomerID', 'Date', 'p1', 'p2', 'p3', 'p4', 'Temperature', 'Rainfall',
       'CPI', 'Female', 'Age', 'Income', 'p1_m', 'p2_m', 'p3_m', 'p4_m',
       'logit_t', 'membr_choice'],
      dtype='object')
Optimization terminated successfully.
         Current function value: 0.307180
         Iterations 8


In [36]:
stage2_model.summary()

0,1,2,3
Dep. Variable:,membr_choice,No. Observations:,17900.0
Model:,Logit,Df Residuals:,17890.0
Method:,MLE,Df Model:,9.0
Date:,"Sun, 16 Jun 2024",Pseudo R-squ.:,0.5475
Time:,21:31:23,Log-Likelihood:,-5498.5
converged:,True,LL-Null:,-12152.0
Covariance Type:,nonrobust,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
p1_m,0.1004,0.005,21.352,0.000,0.091,0.110
p2_m,0.0298,0.007,4.460,0.000,0.017,0.043
p3_m,0.0222,0.006,3.779,0.000,0.011,0.034
p4_m,0.0406,0.006,7.222,0.000,0.030,0.052
Temperature,0.0222,0.003,8.455,0.000,0.017,0.027
Rainfall,-0.0691,0.001,-58.199,0.000,-0.071,-0.067
CPI,-0.0299,0.002,-18.723,0.000,-0.033,-0.027
Female,-0.5693,0.053,-10.838,0.000,-0.672,-0.466
Age,-0.0034,0.004,-0.770,0.441,-0.012,0.005


In [33]:
data.head()

Unnamed: 0,CustomerID,Date,p1,p2,p3,p4,Temperature,Rainfall,CPI,Female,Age,Income,p1_m,p2_m,p3_m,p4_m,logit_t,membr_choice,pred_s2
0,1,2023-12-21,10.0,5.0,11.0,7.0,45.34,0.0,100.93,0,60,59662.22,11.938797,6.398548,11.122452,8.794087,1.964666,1,0.671563
1,1,2023-12-22,14.0,13.0,13.0,10.0,51.87,62.52,100.93,0,60,59662.22,15.354608,7.974386,16.914026,14.223072,-1.842689,0,0.061808
2,1,2023-12-23,21.0,18.0,17.0,12.0,50.23,0.0,100.93,0,60,59662.22,17.900641,14.282931,18.020978,15.267904,2.127569,1,0.888315
3,1,2023-12-24,24.0,14.0,23.0,11.0,36.22,0.0,100.93,0,60,59662.22,24.42502,19.265122,21.28233,16.273594,3.161343,1,0.935832
4,1,2023-12-25,33.0,15.0,32.0,12.0,46.08,5.22,100.93,0,60,59662.22,26.147481,15.384907,25.23335,15.937731,0.65197,0,0.93519


#### stage three model - seller prediction

In [65]:
def stage3_pred(data, dep_indep):
    for i in list(dep_indep.keys()):
        X = data[dep_indep[i]]
        X = sm.add_constant(X)
        y = data[[i]]
        model = sm.OLS(y, X).fit()
        predictions = model.predict(X)
        data[f'pred_s3_p{i[-1]}'] = predictions
        print(model.summary())
    return data

data = pd.read_csv('./data/stage3_data.csv')

dep_indep = {'a_p1':['cp_p1', 'price_market_p1']
            , 'a_p2':['cp_p2', 'price_market_p2']
            , 'a_p3':['cp_p3', 'price_market_p3']
            , 'a_p4':['cp_p4', 'price_market_p4']}

data = stage3_pred(data, dep_indep)

                            OLS Regression Results                            
Dep. Variable:                   a_p1   R-squared:                       0.974
Model:                            OLS   Adj. R-squared:                  0.974
Method:                 Least Squares   F-statistic:                     3329.
Date:                Sun, 16 Jun 2024   Prob (F-statistic):          9.84e-142
Time:                        22:36:38   Log-Likelihood:                 293.84
No. Observations:                 181   AIC:                            -581.7
Df Residuals:                     178   BIC:                            -572.1
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
const               0.0021      0.005     

In [69]:
data

Unnamed: 0,Date,cp_p1,cp_p2,cp_p3,cp_p4,price_market_p1,price_market_p2,price_market_p3,price_market_p4,a_p1,a_p2,a_p3,a_p4,pred_s3_p1,pred_s3_p2,pred_s3_p3,pred_s3_p4
0,2023-12-19,5.528105,-1.124854,6.795545,4.036522,-30.045901,42.340854,-1.431295,7.243602,-0.468156,0.334607,0.047410,-0.002484,-0.555265,0.260936,-0.037550,-0.060671
1,2023-12-20,2.800314,1.069145,5.019895,0.230709,-12.179524,7.176305,0.429218,3.577205,-0.224134,0.028777,0.000737,0.052261,-0.243869,0.008381,-0.016599,0.018745
2,2023-12-21,3.957476,-3.670209,3.952954,0.112328,-13.319726,-2.770299,15.774622,6.922325,-0.291669,0.099551,0.101294,0.081952,-0.298955,0.097830,0.091087,0.086437
3,2023-12-22,6.481786,4.875694,4.482785,3.156395,41.082985,29.410401,9.518204,2.884944,0.295473,0.046354,0.080035,-0.068169,0.261299,0.012170,0.046728,-0.108500
4,2023-12-23,5.735116,-1.806173,3.428040,-4.118369,30.316851,34.935943,0.208394,6.405365,0.175249,0.243164,0.002230,0.265460,0.159278,0.242072,-0.010440,0.241819
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176,2024-06-12,0.393181,1.566078,3.391309,-0.105081,-26.543317,10.250171,5.502178,5.635425,-0.334761,0.005314,0.019472,0.064421,-0.329443,0.009785,0.025120,0.070659
177,2024-06-13,0.620900,1.561033,3.670352,0.226258,-7.085994,23.953597,29.234719,5.542611,-0.128726,0.054717,0.164949,0.029898,-0.104884,0.082692,0.182409,0.055976
178,2024-06-14,1.088935,4.861075,4.640440,-0.020585,19.188622,0.190700,14.053869,5.715939,0.182874,-0.143344,0.066987,0.055573,0.192604,-0.142527,0.076288,0.068879
179,2024-06-15,2.034958,5.095371,0.374279,1.367851,14.757789,8.981479,12.592492,2.005387,0.146066,-0.055532,0.128507,0.005473,0.105724,-0.102887,0.086982,-0.055275


In [99]:
d = {'cp':['cp_p1','cp_p2','cp_p3','cp_p4']
     , 'price_market': [ 'price_market_p1','price_market_p2','price_market_p3','price_market_p4',]
     , 'a': ['a_p1','a_p2','a_p3','a_p4',]
     , 'pred_s3': ['pred_s3_p1','pred_s3_p2','pred_s3_p3','pred_s3_p4']
}

stage3_data_long = pd.DataFrame()

for i in list(d.keys()):
     tmp = pd.melt(data, id_vars=['Date'], value_vars= d[i], var_name='ProductID', value_name=i)
     tmp['ProductID'] = tmp['ProductID'].str[-1]
     stage3_data_long = pd.concat([stage3_data_long, tmp], axis=1)
     
stage3_data_long = stage3_data_long.loc[:,~stage3_data_long.columns.duplicated()].copy()

In [102]:
stage3_data_long['Demand_Priority'] = stage3_data_long.groupby(['Date'])['a'].rank(ascending=False)
stage3_data_long['Profitability_Priority'] = stage3_data_long.groupby(['Date'])['cp'].rank(ascending=False)

In [103]:
stage3_data_long

Unnamed: 0,Date,ProductID,cp,price_market,a,pred_s3,Demand_Priority,Profitability_Priority
0,2023-12-19,1,5.528105,-30.045901,-0.468156,-0.555265,4.0,2.0
1,2023-12-20,1,2.800314,-12.179524,-0.224134,-0.243869,4.0,2.0
2,2023-12-21,1,3.957476,-13.319726,-0.291669,-0.298955,4.0,1.0
3,2023-12-22,1,6.481786,41.082985,0.295473,0.261299,1.0,1.0
4,2023-12-23,1,5.735116,30.316851,0.175249,0.159278,3.0,1.0
...,...,...,...,...,...,...,...,...
719,2024-06-12,4,-0.105081,5.635425,0.064421,0.070659,1.0,4.0
720,2024-06-13,4,0.226258,5.542611,0.029898,0.055976,3.0,4.0
721,2024-06-14,4,-0.020585,5.715939,0.055573,0.068879,3.0,4.0
722,2024-06-15,4,1.367851,2.005387,0.005473,-0.055275,3.0,3.0


In [138]:
stage3_data_long['ProductID'] = stage3_data_long['ProductID'].astype(int)
stage3_data_long.to_csv('./data/stage3_data_store.csv', index=False)

In [139]:
stage1_data_store = pd.read_csv('./data/stage1_data_store.csv')
df = stage3_data_long.merge(stage1_data_store, on=['Date', 'ProductID'], how='inner')
df.sort_values(by=['Date','Low_Stock_Priority', 'Demand_Priority', 'Profitability_Priority'], ascending=True, inplace=True)

In [140]:
df = df[['Date', 'ProductID', 'Low_Stock_Priority', 'Low_Stock_Risk', 'Demand_Priority', 'Profitability_Priority', 'cp', 'price_market', 'a', 'pred_s3',
        'OrderQuantity', 'Inventory', 'Temperature', 'Rainfall', 'CPI',
       'Female', 'Age', 'Income']]
df.to_csv('./data/data_demo.csv')

In [141]:
df

Unnamed: 0,Date,ProductID,Low_Stock_Priority,Low_Stock_Risk,Demand_Priority,Profitability_Priority,cp,price_market,a,pred_s3,OrderQuantity,Inventory,Temperature,Rainfall,CPI,Female,Age,Income
540,2023-12-20,4,10,0.331852,1.0,4.0,0.230709,3.577205,0.052261,0.018745,451,675,42.22,0.00,100.93,0.56,44.08,49946.1506
180,2023-12-20,2,10,0.076190,2.0,3.0,1.069145,7.176305,0.028777,0.008381,485,525,42.22,0.00,100.93,0.56,44.08,49946.1506
360,2023-12-20,3,10,0.184061,3.0,1.0,5.019895,0.429218,0.000737,-0.016599,430,527,42.22,0.00,100.93,0.56,44.08,49946.1506
0,2023-12-20,1,10,0.287113,4.0,2.0,2.800314,-12.179524,-0.224134,-0.243869,437,613,42.22,0.00,100.93,0.56,44.08,49946.1506
181,2023-12-21,2,1,-0.183712,2.0,4.0,-3.670209,-2.770299,0.099551,0.097830,625,528,45.34,0.00,100.93,0.56,44.08,49946.1506
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
538,2024-06-15,3,10,0.070513,2.0,4.0,0.374279,12.592492,0.128507,0.086982,1740,1872,79.87,69.85,104.61,0.56,44.08,49946.1506
359,2024-06-16,2,2,-0.094262,3.0,2.0,3.011313,2.998763,-0.133965,-0.072095,534,488,78.23,0.38,104.61,0.56,44.08,49946.1506
179,2024-06-16,1,3,-0.053156,4.0,3.0,1.292012,-11.293230,-0.246674,-0.179242,1585,1505,78.23,0.38,104.61,0.56,44.08,49946.1506
719,2024-06-16,4,4,-0.022222,1.0,4.0,0.229020,5.880938,-0.009800,0.062247,2530,2475,78.23,0.38,104.61,0.56,44.08,49946.1506
