In [182]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

In [183]:
## α_jt = β_0 + β_1*CP_jt + β_2*P(m)_jt + δ_j + v_jt

n_customers = 100
n_products = 10
n_days = 180

In [184]:
def gen_welfare_df(days, c, output_dir):    
    # Step 1: Generate dates spanning 6 months to today
    end_date = datetime.today() + timedelta(days=14)
    start_date = end_date - timedelta(days=days)  # Approximate 6 months as 180 days
    dates = pd.date_range(start=start_date, end=end_date, freq='D')
    n_days = len(dates)
    
    # Step 2: Generate CP and market price_market_market_market
    np.random.seed(0)  # For reproducibility
  
    # Initialize the DataFrame with the Date column
    df = pd.DataFrame({'Date': dates})

    # Create a DataFrame with dates 
    for i in range(1, 11):
        df[f'cp_p{i}'] = np.random.normal(2 * i, 2, n_days)  # Adjust mean and std as necessary
        df[f'price_market_p{i}'] = np.random.normal(5 * i, 10, n_days)  # Adjust mean and std as necessary

    noise = np.random.normal(0, 0.05, n_days)
    
    # Fix the date format to 'YYYY-MM-DD'
    df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')
    for i in range(1, 11):
        df[f'a_p{i}'] = (c['incpt'] + 
                        c[f'cp_p{i}'] * df[f'cp_p{i}'] + 
                        c[f'price_market_p{i}'] * df[f'price_market_p{i}'] + 
                        noise)
    df.to_csv(output_dir, index=False)
    return df

c = {
    'incpt': 0.5,
    'cp_p1': 0.1, 'price_market_p1': 0.05,
    'cp_p2': 0.2, 'price_market_p2': 0.1,
    'cp_p3': 0.3, 'price_market_p3': 0.15,
    'cp_p4': 0.4, 'price_market_p4': 0.2,
    'cp_p5': 0.5, 'price_market_p5': 0.25,
    'cp_p6': 0.6, 'price_market_p6': 0.3,
    'cp_p7': 0.7, 'price_market_p7': 0.35,
    'cp_p8': 0.8, 'price_market_p8': 0.4,
    'cp_p9': 0.9, 'price_market_p9': 0.45,
    'cp_p10': 1.0, 'price_market_p10': 0.5,
}  
df_perceived = gen_welfare_df(days=180, c = c, output_dir = './data/stage3_data.csv')

In [187]:
df_perceived.tail(10)

Unnamed: 0,Date,cp_p1,price_market_p1,cp_p2,price_market_p2,cp_p3,price_market_p3,cp_p4,price_market_p4,cp_p5,...,a_p1,a_p2,a_p3,a_p4,a_p5,a_p6,a_p7,a_p8,a_p9,a_p10
171,2024-06-23,2.637455,18.645318,4.129712,23.277827,9.995912,21.071117,10.303468,24.066571,11.055195,...,1.631886,3.5896,6.595316,9.370576,14.258416,14.503516,21.383893,27.51242,28.74241,48.70534
172,2024-06-24,3.713661,-1.894492,1.440622,8.987185,4.286901,4.518296,6.821542,32.016979,8.552143,...,0.797649,1.70785,2.484822,9.65302,10.349793,19.912949,24.064013,28.686701,46.161089,47.935916
173,2024-06-25,0.697949,-1.522936,2.829138,1.968586,2.916825,6.397375,7.10307,21.474344,7.760744,...,0.521522,1.29056,2.362528,7.663971,14.237165,16.044009,25.806705,32.363247,40.677548,49.415572
174,2024-06-26,-0.068486,-0.211893,3.476709,5.356623,11.188849,18.283013,8.263148,10.225351,8.406449,...,0.519377,1.767825,6.635927,5.88715,14.645581,13.039268,23.802963,27.971267,35.703504,37.601724
175,2024-06-27,3.363189,-13.430696,3.63551,20.217906,5.191935,10.987022,5.18888,28.793899,13.096134,...,0.145484,3.229593,3.686334,8.315032,12.207011,13.852788,24.366249,34.38913,37.435348,43.873021
176,2024-06-28,0.393181,0.22026,3.594206,4.474593,3.076535,11.833447,7.300436,26.354245,9.876513,...,0.549785,1.665755,3.197432,8.690478,10.09311,14.562207,26.205637,34.650029,41.772633,56.686601
177,2024-06-29,0.6209,0.203442,3.780234,6.131292,4.63312,20.969065,12.046944,25.426108,9.106327,...,0.535722,1.832636,4.998756,10.367459,7.106191,15.715417,22.167828,33.163188,40.134294,51.621983
178,2024-06-30,1.088935,11.203583,4.42696,4.897073,6.73509,5.127133,9.010774,27.159389,9.632489,...,1.103563,1.809589,3.224087,9.470677,11.184311,17.749273,23.795463,36.387119,35.087858,50.927367
179,2024-07-01,2.034958,11.984571,1.582853,11.839255,6.380623,10.987653,8.718498,-9.946129,11.649236,...,1.356681,2.054453,4.116291,2.05213,11.71998,17.545113,17.517554,35.33149,36.281573,42.202052
180,2024-07-02,1.292012,5.037709,3.51596,6.145102,4.296542,6.999175,4.835011,28.809376,7.374301,...,0.875949,1.812564,2.833701,8.190741,12.37964,25.162812,20.00457,33.524022,35.307476,49.03287


In [186]:
df_perceived.describe()

Unnamed: 0,cp_p1,price_market_p1,cp_p2,price_market_p2,cp_p3,price_market_p3,cp_p4,price_market_p4,cp_p5,price_market_p5,...,a_p1,a_p2,a_p3,a_p4,a_p5,a_p6,a_p7,a_p8,a_p9,a_p10
count,181.0,181.0,181.0,181.0,181.0,181.0,181.0,181.0,181.0,181.0,...,181.0,181.0,181.0,181.0,181.0,181.0,181.0,181.0,181.0,181.0
mean,2.181614,3.770772,3.828531,8.650767,5.797435,16.074969,8.204035,19.657025,10.048198,25.222903,...,0.904576,2.128659,4.648352,7.710896,11.827701,16.564392,22.287074,29.179305,36.562107,45.874561
std,2.014758,9.694993,2.05143,9.915376,1.884445,9.320992,1.926301,10.175358,1.9146,10.045595,...,0.534621,1.06177,1.525189,2.204018,2.716605,3.281794,3.630899,4.11084,4.959042,5.59075
min,-3.10598,-22.725928,-1.479354,-20.461431,0.834407,-5.473236,2.445282,-9.946129,4.917522,0.624356,...,-0.576746,-0.838603,1.225829,2.020731,5.94682,7.19665,11.933993,17.439804,24.511819,30.183492
25%,0.63038,-1.920498,2.434742,3.036733,4.368417,9.784203,7.091839,12.886936,8.713485,18.244062,...,0.519322,1.528052,3.542691,5.987708,9.983845,14.097428,19.688534,26.179686,33.160367,42.108396
50%,2.133034,3.640503,3.898793,8.987185,5.865732,16.04714,8.317917,19.40072,10.002942,25.82611,...,0.919832,2.054453,4.617728,7.705636,11.886732,16.529718,22.439745,29.060236,35.968782,46.074152
75%,3.688726,10.785215,5.286629,14.524891,6.944494,21.90429,9.463318,26.251187,11.303563,31.563908,...,1.257605,2.837627,5.581706,9.2396,13.652875,18.934447,24.932245,32.316261,40.100429,49.169529
max,6.76629,28.039167,9.392448,33.807454,11.188849,46.709748,13.052736,46.429357,15.361142,49.884416,...,2.325233,4.735972,9.250299,13.116634,18.364666,26.160545,30.447055,39.729477,53.198882,62.948003
