In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

In [2]:
## α_jt = β_0 + β_1*CP_jt + β_2*P(m)_jt + δ_j + v_jt

In [165]:
def gen_welfare_df(days, c, output_dir):    
    # Step 1: Generate dates spanning 6 months to today
    end_date = datetime.today()
    start_date = end_date - timedelta(days=days)  # Approximate 6 months as 180 days
    dates = pd.date_range(start=start_date, end=end_date, freq='D')
    
    # Step 2: Generate CP and market price_market_market_market
    np.random.seed(0)  # For reproducibility
    num_days = len(dates)
  
    # Create a DataFrame with dates 
    df = pd.DataFrame({'Date': dates, 
                       'cp_p1': np.random.normal(2, 2, num_days),
                       'cp_p2': np.random.normal(3, 3, num_days),
                       'cp_p3': np.random.normal(4, 3, num_days),
                       'cp_p4': np.random.normal(1, 2, num_days),
                       'price_market_p1': np.random.normal(10, 25, num_days),
                       'price_market_p2': np.random.normal(15, 15, num_days),
                       'price_market_p3': np.random.normal(9, 10, num_days),
                       'price_market_p4': np.random.normal(5, 1, num_days),
                       })
    noise = np.random.normal(0, 0.05, num_days)
    
    # Fix the date format to 'YYYY-MM-DD'
    df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')
    df['a_p1'] = c['incpt'] + np.dot([c['cp_p1'], c['price_market_p1']], [df['cp_p1'], df['price_market_p1']]) + noise
    df['a_p2'] = c['incpt'] + np.dot([c['cp_p2'], c['price_market_p2']], [df['cp_p2'], df['price_market_p2']]) + noise
    df['a_p3'] = c['incpt'] + np.dot([c['cp_p3'], c['price_market_p3']], [df['cp_p3'], df['price_market_p3']]) + noise
    df['a_p4'] = c['incpt'] + np.dot([c['cp_p4'], c['price_market_p4']], [df['cp_p4'], df['price_market_p4']]) + noise
    df.to_csv(output_dir, index=False)
    return df

c = {'cp_p1':-0.036,
    'cp_p2':-0.028,
    'cp_p3':-0.005,
    'cp_p4':-0.043,
    'price_market_p1':0.012,
    'price_market_p2':0.005,
    'price_market_p3':0.007,
    'price_market_p4':0.011,
    'incpt':0.001}   
df_perceived = gen_welfare_df(days=180, c = c, output_dir = './data/stage3_data.csv')

In [166]:
df_perceived.head(10)

Unnamed: 0,Date,cp_p1,cp_p2,cp_p3,cp_p4,price_market_p1,price_market_p2,price_market_p3,price_market_p4,a_p1,a_p2,a_p3,a_p4
0,2023-12-19,5.528105,-1.124854,6.795545,4.036522,-30.045901,42.340854,-1.431295,7.243602,-0.468156,0.334607,0.04741,-0.002484
1,2023-12-20,2.800314,1.069145,5.019895,0.230709,-12.179524,7.176305,0.429218,3.577205,-0.224134,0.028777,0.000737,0.052261
2,2023-12-21,3.957476,-3.670209,3.952954,0.112328,-13.319726,-2.770299,15.774622,6.922325,-0.291669,0.099551,0.101294,0.081952
3,2023-12-22,6.481786,4.875694,4.482785,3.156395,41.082985,29.410401,9.518204,2.884944,0.295473,0.046354,0.080035,-0.068169
4,2023-12-23,5.735116,-1.806173,3.42804,-4.118369,30.316851,34.935943,0.208394,6.405365,0.175249,0.243164,0.00223,0.26546
5,2023-12-24,0.045444,-0.31315,2.815451,3.362757,24.681484,2.737604,6.688984,6.618054,0.328131,0.056045,0.066335,-0.038211
6,2023-12-25,3.900177,3.156495,3.196799,-0.263808,-2.633958,-6.020209,-7.388073,4.175591,-0.17094,-0.117409,-0.066627,0.058349
7,2023-12-26,1.697286,0.781311,0.615966,1.327857,-10.394789,30.456574,1.666872,5.42258,-0.223175,0.093071,-0.028747,-0.034785
8,2023-12-27,1.793562,7.629044,4.841325,1.192643,-2.68794,-15.709854,30.495745,5.547481,-0.14604,-0.341379,0.140047,-0.039477
9,2023-12-28,2.821197,-0.878571,1.020629,2.884936,-16.297003,-3.399325,8.097562,4.186206,-0.346037,-0.041306,0.00267,-0.126914


In [167]:
df_perceived.describe()

Unnamed: 0,cp_p1,cp_p2,cp_p3,cp_p4,price_market_p1,price_market_p2,price_market_p3,price_market_p4,a_p1,a_p2,a_p3,a_p4
count,181.0,181.0,181.0,181.0,181.0,181.0,181.0,181.0,181.0,181.0,181.0,181.0
mean,2.181614,2.631232,3.742797,0.730153,7.467936,16.612453,10.020177,4.965703,0.013282,0.011593,0.053632,0.025431
std,2.014758,2.908498,3.077145,1.983075,23.55556,13.981487,9.631505,1.017536,0.296548,0.126516,0.082445,0.091454
min,-3.10598,-5.317778,-4.219032,-5.092286,-54.569916,-15.709854,-18.773591,2.005387,-0.782691,-0.341379,-0.138244,-0.251455
25%,0.63038,0.92385,1.652113,-0.392653,-10.394789,7.176305,4.459196,4.288694,-0.189671,-0.073708,-0.004621,-0.0313
50%,2.133034,2.592151,3.848189,0.797437,8.321646,16.57071,10.589587,4.940072,0.009009,0.002322,0.055533,0.028641
75%,3.688726,4.735564,5.929943,1.904978,21.806179,25.356435,16.316589,5.625119,0.223498,0.094702,0.112883,0.081275
max,6.76629,9.91175,12.088672,5.761491,74.860615,62.564622,34.263682,7.642936,0.861954,0.342895,0.241734,0.26546
