In [158]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

In [159]:
## p(y_i = 1|X_it) = sigmoid(α_0 + α_1*X_1it + α_2*X_2it + γ_1*age_i + γ_2*female_i + γ_3*income_i + γ_4*cpi_t)

n_customers = 100
n_products = 10
n_days = 180
product_names = ['Avocado','Banana','Beef','Cauliflower','Egg','Milk','Shrimp','Tomato','Yogurt','Zucchini']

end_date = datetime.today() + timedelta(days=14)
start_date = end_date - timedelta(days=n_days-1)  # Approximate 6 months as 180 days
dates = pd.date_range(start=start_date, end=end_date, freq='D')

In [160]:
def gen_s2_tbl(df_s1, c, output_dir):
    # true quantity
    df_pivot_t = df_s1.pivot_table(index=['CustomerID', 'Date'], columns='ProductID', values='OrderQuantity', fill_value=0).reset_index()
    df_pivot_t.columns = ['CustomerID', 'Date'] + ['p'+str(i+1) for i in range(n_products)]
    # model predicted quantity
    df_pivot_m = df_s1.pivot_table(index=['CustomerID', 'Date'], columns='ProductID', values='pred_s1', fill_value=0).reset_index()
    df_pivot_m.columns = ['CustomerID', 'Date'] + ['p'+str(i+1)+'_m' for i in range(n_products)]
    df_merged = pd.merge(df_pivot_t, df_s1[['CustomerID', 'Date','Temperature','Rainfall',
                                        'CPI','MCPI','Female','Age','Income']].drop_duplicates(), on=['CustomerID','Date'])
    df_merged = pd.merge(df_merged, df_pivot_m, on=['CustomerID','Date'])

    coefficients = np.array([c['intcpt']] + [c['p'+str(i+1)] for i in range(n_products)] + [c['age'], c['female'], c['income'], c['cpi'], c['mcpi'], c['temp'], c['rain']])

    # Create the feature array, including a column of ones for the intercept term
    features = df_merged[['p'+str(i+1) for i in range(n_products)]+['Age', 'Female', 'Income', 'CPI', 'MCPI', 'Temperature', 'Rainfall']]
    features.insert(0, 'Intercept', 1)

    df_merged['logit_t'] = np.dot(features, coefficients) + np.random.normal(0,0.001,len(df_merged))

    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    df_merged['Membership_Choice']=np.where(df_merged['logit_t'].apply(sigmoid)>0.7, 1, 0)
    df_merged.to_csv(output_dir, index=False)
    return df_merged


In [161]:
c = {
    'intcpt': 0.01,
    'p1':0.01,
    'p2':0.025,
    'p3':0.014,
    'p4':0.02,
    'p5':0.05,
    'p6':0.02,
    'p7':0.015,
    'p8':0.05,
    'p9':0.012,
    'p10':0.025,
    'female': -0.3,
    'age': 0.005,
    'income': 0.00001,
    'temp': 0.01,
    'rain': -0.05,
    'cpi': -0.01,
    'mcpi': -0.01
}
df_s1 = pd.read_csv('./data/stage1_model_data.csv')
print(df_s1.head(2))
df_s2 = gen_s2_tbl(df_s1=df_s1, c=c, output_dir='./data/stage2_data.csv')

   CustomerID  ProductID        Date  OrderQuantity  OrderQuantity_lag1  Day  \
0           1          1  2024-01-06              6                 6.0    2   
1           1          1  2024-01-07              2                 6.0    3   

   Product  Temperature  Rainfall    CPI   MCPI  Female  Age    Income  \
0  Avocado        42.22       0.0  100.7  100.7       0   60  59662.22   
1  Avocado        45.34       0.0  100.7  100.7       0   60  59662.22   

   intercept   pred_s1  
0          1  6.704962  
1          1  6.627071  


In [162]:
df_s2.head(2)

Unnamed: 0,CustomerID,Date,p1,p2,p3,p4,p5,p6,p7,p8,...,p3_m,p4_m,p5_m,p6_m,p7_m,p8_m,p9_m,p10_m,logit_t,Membership_Choice
0,1,2024-01-06,6.0,8.0,4.0,3.0,6.0,9.0,2.0,11.0,...,8.036451,7.42152,5.509271,7.864771,6.95644,9.830769,9.33876,10.600198,1.254485,1
1,1,2024-01-07,2.0,11.0,13.0,10.0,4.0,11.0,5.0,11.0,...,8.401939,8.475296,7.828525,8.498183,7.333603,10.931386,13.748742,14.281508,1.650778,1


In [163]:
df_s2.Membership_Choice.describe()

count    17900.000000
mean         0.363184
std          0.480931
min          0.000000
25%          0.000000
50%          0.000000
75%          1.000000
max          1.000000
Name: Membership_Choice, dtype: float64

In [165]:
df_s2.groupby(['Date']).agg({'Membership_Choice': 'mean'}).to_csv('./data/stage2_data_store.csv')