In [203]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

In [204]:
## α_jt = β_0 + β_1*CP_jt + β_2*P(m)_jt + δ_j + v_jt

n_customers = 100
n_products = 10
n_days = 180
product_names = ['Avocado','Banana','Beef','Cauliflower','Egg','Milk','Shrimp','Tomato','Yogurt','Zucchini']

end_date = datetime.today() + timedelta(days=14)
start_date = end_date - timedelta(days=n_days-1)  # Approximate 6 months as 180 days
dates = pd.date_range(start=start_date, end=end_date, freq='D')

In [205]:
def gen_welfare_df(days, c, output_dir):    
    # Step 1: Generate dates spanning 6 months to today
    n_days = len(dates)
    
    # Step 2: Generate CP and market price_market_market_market
    np.random.seed(0)  # For reproducibility
  
    # Initialize the DataFrame with the Date column
    df = pd.DataFrame({'Date': dates})

    # Create a DataFrame with dates 
    for i in range(1, 11):
        df[f'cp_p{i}'] = np.random.normal(2 * i, 2, n_days)  # Adjust mean and std as necessary
        df[f'price_market_p{i}'] = np.random.normal(5 * i, 10, n_days)  # Adjust mean and std as necessary

    noise = np.random.normal(0, 0.05, n_days)
    
    # Fix the date format to 'YYYY-MM-DD'
    df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')
    for i in range(1, 11):
        df[f'a_p{i}'] = (c['incpt'] + 
                        c[f'cp_p{i}'] * df[f'cp_p{i}'] + 
                        c[f'price_market_p{i}'] * df[f'price_market_p{i}'] + 
                        noise)
    df.to_csv(output_dir, index=False)
    return df

c = {
    'incpt': 0.5,
    'cp_p1': 0.1, 'price_market_p1': 0.05,
    'cp_p2': 0.2, 'price_market_p2': 0.1,
    'cp_p3': 0.3, 'price_market_p3': 0.15,
    'cp_p4': 0.4, 'price_market_p4': 0.2,
    'cp_p5': 0.5, 'price_market_p5': 0.25,
    'cp_p6': 0.6, 'price_market_p6': 0.3,
    'cp_p7': 0.7, 'price_market_p7': 0.35,
    'cp_p8': 0.8, 'price_market_p8': 0.4,
    'cp_p9': 0.9, 'price_market_p9': 0.45,
    'cp_p10': 1.0, 'price_market_p10': 0.5,
}  
df_perceived = gen_welfare_df(days=180, c = c, output_dir = './data/stage3_data.csv')

In [206]:
df_perceived.tail(10)

Unnamed: 0,Date,cp_p1,price_market_p1,cp_p2,price_market_p2,cp_p3,price_market_p3,cp_p4,price_market_p4,cp_p5,...,a_p1,a_p2,a_p3,a_p4,a_p5,a_p6,a_p7,a_p8,a_p9,a_p10
170,2024-06-24,3.841718,-2.641439,-0.479208,2.950786,4.705637,4.685175,6.439391,26.659671,11.826404,...,0.741589,0.688727,2.603957,8.39718,11.749909,14.012439,21.664572,32.300247,50.450673,46.381963
171,2024-06-25,2.637455,-9.377915,4.802998,16.799748,6.944494,0.700087,8.542226,-5.345545,14.422609,...,0.27264,3.118364,2.666151,2.825571,15.4615,16.350005,21.405058,29.567082,36.221621,42.031795
172,2024-06-26,3.713661,18.645318,6.449741,3.036733,7.860817,14.383619,10.090047,6.248155,6.405088,...,1.840973,2.130962,5.053129,5.82299,6.011265,11.799949,22.233015,22.281071,31.739721,44.024383
173,2024-06-27,0.697949,-1.894492,4.129712,7.096029,5.649367,0.672645,9.198079,25.009922,7.873134,...,0.443198,2.003673,2.263835,9.149344,10.662751,20.254259,22.691428,28.555077,44.771666,50.489097
174,2024-06-28,-0.068486,-1.522936,1.440622,23.277827,3.15616,15.875315,7.318615,15.19751,8.640814,...,0.460593,3.159495,3.871733,6.510536,8.470311,16.534727,21.489986,30.549975,36.763579,55.241219
175,2024-06-29,3.363189,-0.211893,2.829138,8.987185,9.995912,24.387469,5.473654,29.361076,8.871364,...,0.844449,1.983271,7.175619,8.580402,9.814445,11.87261,17.44431,25.646632,36.562817,42.309394
176,2024-06-30,0.393181,-13.430696,3.476709,1.968586,4.286901,21.071117,2.445282,28.091803,10.454692,...,-0.074465,1.449952,5.004489,7.154225,12.101943,14.789918,18.449382,32.674579,37.892367,49.013443
177,2024-07-01,0.6209,0.22026,3.63551,5.356623,2.916825,4.518296,10.303468,8.019071,13.228499,...,0.606623,1.796284,2.086312,6.258721,13.856395,15.079991,23.208063,25.476906,35.412705,36.133333
178,2024-07-02,1.088935,0.203442,3.594206,20.217906,11.188849,6.397375,6.821542,24.066571,12.017195,...,0.566343,3.18791,4.763539,7.989209,11.529019,13.357239,19.997976,30.002985,30.244368,49.532257
179,2024-07-03,2.034958,11.203583,3.780234,4.474593,5.191935,18.283013,7.10307,32.016979,11.055195,...,1.220393,1.660225,4.756751,9.701342,12.916307,10.861971,17.373248,28.608385,33.756264,51.835421


In [207]:
df_perceived.describe()

Unnamed: 0,cp_p1,price_market_p1,cp_p2,price_market_p2,cp_p3,price_market_p3,cp_p4,price_market_p4,cp_p5,price_market_p5,...,a_p1,a_p2,a_p3,a_p4,a_p5,a_p6,a_p7,a_p8,a_p9,a_p10
count,180.0,180.0,180.0,180.0,180.0,180.0,180.0,180.0,180.0,180.0,...,180.0,180.0,180.0,180.0,180.0,180.0,180.0,180.0,180.0,180.0
mean,2.186556,3.705264,3.849127,8.635584,5.811202,16.073752,8.183041,19.646908,10.071827,25.042808,...,0.903387,2.132852,4.653891,7.702066,11.796083,16.539957,22.325224,28.976069,36.373086,45.798301
std,2.019278,9.703485,2.052961,9.968743,1.873936,9.380501,1.894713,10.104211,1.949556,10.031575,...,0.533369,1.109823,1.494393,2.120825,2.83264,3.26015,3.595341,3.704295,5.107502,5.736139
min,-3.10598,-22.725928,-1.479354,-20.461431,0.834407,-5.473236,2.445282,-8.022028,4.010774,0.624356,...,-0.340519,-1.044763,0.802702,1.650869,5.721113,7.107572,10.60884,20.796123,24.691077,29.660533
25%,0.62801,-1.952125,2.466447,2.890478,4.450698,9.514895,7.090967,12.861624,8.740983,16.944912,...,0.468598,1.515193,3.754634,6.398518,9.790641,14.314801,19.877568,26.51518,32.801208,42.287612
50%,2.154068,3.413798,3.920054,9.041683,5.863224,16.496245,8.263532,19.208824,10.09388,25.420373,...,0.911301,2.148076,4.648441,7.689552,11.949015,16.629641,22.292739,28.93878,36.03517,46.022958
75%,3.69496,10.375486,5.37598,14.528123,6.94682,21.930821,9.461357,26.046287,11.380041,31.133628,...,1.250899,2.930291,5.522883,9.087181,13.522446,18.552511,24.7135,31.831104,39.896266,49.405466
max,6.76629,28.039167,9.392448,33.807454,11.188849,46.709748,13.052736,46.429357,15.361142,49.884416,...,2.233025,5.254388,9.784848,12.951744,19.028456,26.031733,30.414982,38.19876,51.140673,58.532303
