In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

In [2]:
## α_jt = β_0 + β_1*CP_jt + β_2*P(m)_jt + δ_j + v_jt

In [25]:
def gen_seller_df(days, c, output_dir):    
    # Step 1: Generate dates spanning 6 months to today
    end_date = datetime.today()
    start_date = end_date - timedelta(days=days)  # Approximate 6 months as 180 days
    dates = pd.date_range(start=start_date, end=end_date, freq='D')
    
    # Step 2: Generate temperature values with increasing trend and random noise
    np.random.seed(0)  # For reproducibility
    num_days = len(dates)
  
    # Create a DataFrame with dates and temperatures
    df = pd.DataFrame({'Date': dates, 
                       'prft_p1': np.random.normal(10, 5, num_days),
                       'prft_p2': np.random.normal(15, 3, num_days),
                       'prft_p3': np.random.normal(3, 1, num_days),
                       'prft_p4': np.random.normal(5, 1, num_days),
                       'price_p1': np.random.normal(100, 25, num_days),
                       'price_p2': np.random.normal(150, 15, num_days),
                       'price_p3': np.random.normal(70, 10, num_days),
                       'price_p4': np.random.normal(10, 1, num_days),
                       })
    noise = np.random.normal(2, 1, num_days)
    
    # Fix the date format to 'YYYY-MM-DD'
    df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')
    df['preceived_saving'] = c['incpt'] + c['prft_p1']*df['prft_p1']+c['prft_p2']*df['prft_p2']+c['prft_p3']*df['prft_p3']\
    +c['prft_p4']*df['prft_p4']+c['price_p1']*df['price_p1']+c['price_p2']*df['price_p2']+c['price_p3']*df['price_p3']\
    +c['price_p4']*df['price_p4']+noise
    df.to_csv(output_dir, index=False)
    return df

c = {'prft_p1':2,
    'prft_p2':1.1,
    'prft_p3':1.8,
    'prft_p4':2,
    'price_p1':-0.02,
    'price_p2':-0.11,
    'price_p3':-0.08,
    'price_p4':-0.03,
    'incpt':10}   
df_seller = gen_seller_df(days=180, c = c, output_dir = './data/stage3_data.csv')

In [26]:
df_seller.head()

Unnamed: 0,Date,prft_p1,prft_p2,prft_p3,prft_p4,price_p1,price_p2,price_p3,price_p4,preceived_saving
0,2023-12-19,18.820262,10.875146,3.931848,6.518261,59.954099,177.340854,59.568705,12.243602,57.685785
1,2023-12-20,12.000786,13.069145,3.339965,4.615355,77.820476,142.176305,61.429218,8.577205,43.689459
2,2023-12-21,14.89369,8.329791,2.984318,4.556164,76.680274,132.229701,76.774622,11.922325,43.048466
3,2023-12-22,21.204466,16.875694,3.160928,6.078197,131.082985,164.410401,70.518204,7.884944,64.929891
4,2023-12-23,19.33779,10.193827,2.809347,2.440815,120.316851,169.935943,61.208394,11.405365,45.827346
