In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
from scipy.optimize import minimize
from scipy.optimize import fsolve
from matplotlib import pyplot as plt
import pandas as pd
import statsmodels.api as sm

import sys
sys.path.append("../")
from bargains_linear import * 

In [2]:
def gen_data(params,num_mkts=20,nobs=100,var=5):
    wtp,cost,beta1,beta2,emc1,emc2=params
    emc = emc1,emc2
    passive_result = pd.DataFrame(columns=['phi1','phi2','p1','p2','s1','s2','x1','x2','v']) 
    active_result = pd.DataFrame(columns=['phi1','phi2','p1','p2','s1','s2','x1','x2','v'])
    seq_result = pd.DataFrame(columns=['phi1','phi2','p1','p2','s1','s2','x1','x2','v'])
    for j in range(0,num_mkts):
        mkt_wtp = np.random.normal(loc=0, scale=5)
        wtp_j = wtp+mkt_wtp
        for i in range(0,nobs):
            #sequential
            mc1 = np.random.normal(loc=0, scale=var, size=(2,)) + np.array([emc1,emc2])
            mc2 = np.random.normal(loc=0, scale=1, size=(2,)) 
            mc = mc1+mc2

            #seq results
            phi1,phi2 = seq_bargain(25,cost,wtp_j,mc,betas=[beta1,beta2])
            phi1,phi2,p1,p2,s1,s2 = solve_eq(phi1,phi2,cost,wtp_j,mc)
            row = pd.DataFrame(columns=['phi1','phi2','p1','p2','s1','s2','x1','x2','v'],data=[[phi1,phi2,p1,p2,s1,s2,mc1[0],
                                                                                                mc1[1],wtp_j,]])
            seq_result = pd.concat( [seq_result, row] )


            #active results
            phi1,phi2 =simult_bargain(25,25, cost,wtp_j,mc, active=True,betas=[beta1,beta2])
            phi1,phi2,p1,p2,s1,s2 = solve_eq(phi1,phi2,cost,wtp_j,mc)
            row = pd.DataFrame(columns=['phi1','phi2','p1','p2','s1','s2','x1','x2','v'],data=[[phi1,phi2,p1,p2,s1,s2,mc1[0],
                                                                                                mc1[1],wtp_j,]])
            active_result = pd.concat( [active_result, row] )


            #passive results
            phi1,phi2 =simult_bargain(25,25, cost,wtp_j,mc, active=False,betas=[beta1,beta2])
            phi1,phi2,p1,p2,s1,s2 = solve_eq(phi1,phi2,cost,wtp_j,mc)
            row = pd.DataFrame(columns=['phi1','phi2','p1','p2','s1','s2','x1','x2','v'],data=[[phi1,phi2,p1,p2,s1,s2,mc1[0],
                                                                                                mc1[1],wtp_j,]])
            passive_result = pd.concat( [passive_result, row] )
            
    return seq_result, active_result,passive_result

#[25,5,.5,.2,0,0],[25,5,.2,.5,0,0],

#param_list = [[25,5,.5,.5,0,0],[23,4,.5,.5,0,0],[27,7,.5,.5,0,0],[25,5,.5,.1,0,0],[25,5,.1,.5,0,0]] old list 1/25
#param_list = [[25,5,.5,.5,0,0],[20,5,.5,.5,0,0],[30,7,.5,.5,0,0],[25,5,.5,.1,0,0],[25,5,.1,.5,0,0]]old list 3/18
param_list = [[25,5,.5,.1,0,0],[25,5,.1,.5,0,0]]

for i in range(len(param_list)):
    seq_result,active_result,passive_result = gen_data(param_list[i],num_mkts=500,nobs=2,var=6)
    seq_result.to_csv('fake_data_wtp_fixed/seq_data_%s.csv'%i)
    active_result.to_csv('fake_data_wtp_fixed/active_data_%s.csv'%i)
    passive_result.to_csv('fake_data_wtp_fixed/passive_data_%s.csv'%i)

In [3]:
def run_test(df,print_v=False):
    df = df.copy()
    df['phi1^2'] = df['phi1']**2
    df['1/phi1'] = 1/df['phi1']
    df['1/phi1^2'] = 1/(df['phi1']**2)
    df['log(phi1)'] = np.log(df['phi1'])
    
    #clean df of weird obs
    res0= sm.OLS(df['phi2'],sm.add_constant(df[['v','phi1','phi1^2','log(phi1)','1/phi1']])).fit()
    clean_resid = np.abs(res0.resid) < 3.5*res0.resid.std()
    df_clean = df[clean_resid]
    
    #regress
    res1= sm.OLS(df['phi2'],sm.add_constant(df[['v','phi1','phi1^2','log(phi1)','1/phi1']])).fit()
    res2 = sm.OLS(df_clean['phi2'],sm.add_constant(df_clean[['v']])).fit()
    if print_v :
        print(res2.summary())
        print(res1.summary())
    return np.array([res1.pvalues[1]<=.05,res2.pvalues[1]<=.05])


passive_result1 = pd.read_csv('../GMM_est/fake_data_wtp_fixed/passive_data_0.csv',index_col=0)
active_result1 = pd.read_csv('../GMM_est/fake_data_wtp_fixed/active_data_0.csv',index_col=0)
seq_result1 = pd.read_csv('../GMM_est/fake_data_wtp_fixed/seq_data_0.csv',index_col=0)


dfs = [passive_result1,active_result1,seq_result1]
for i in range(3): 
    df = dfs[i]
    df_sample = df.sample(frac=0.5)
    if i==2:
        run_test(df_sample,print_v=True)
    print(run_test(df_sample))

[False False]
[ True  True]
                            OLS Regression Results                            
Dep. Variable:                   phi2   R-squared:                       0.116
Model:                            OLS   Adj. R-squared:                  0.114
Method:                 Least Squares   F-statistic:                     63.43
Date:                Mon, 18 Mar 2024   Prob (F-statistic):           1.20e-14
Time:                        12:56:18   Log-Likelihood:                -1450.3
No. Observations:                 485   AIC:                             2905.
Df Residuals:                     483   BIC:                             2913.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         17.0691   

In [4]:
# Create an empty DataFrame to store the results
results_list = []

for j in range(2):
    # Read the dataframes
    seq_result = pd.read_csv('../GMM_est/fake_data_wtp_fixed/seq_data_%s.csv' % j, index_col=0)
    active_result = pd.read_csv('../GMM_est/fake_data_wtp_fixed/active_data_%s.csv' % j, index_col=0)
    passive_result = pd.read_csv('../GMM_est/fake_data_wtp_fixed/passive_data_%s.csv' % j, index_col=0)

    # Initialize rejection count variables
    rejections_seq = np.array([0,0])
    rejections_active = np.array([0,0])
    rejections_passive = np.array([0,0])

    nsims = 500
    
    for i in range(nsims):
        # Resample from seq dataframe
        subsample_seq = seq_result.sample(frac=0.5)
        result_seq = run_test(subsample_seq)
        rejections_seq = rejections_seq+ result_seq
        
        
        # Resample from active dataframe
        subsample_active = active_result.sample(frac=0.5)
        result_active = run_test(subsample_active)
        rejections_active = rejections_active+ result_active

        # Resample from passive dataframe
        subsample_passive = passive_result.sample(frac=0.5)
        result_passive = run_test(subsample_passive)
        rejections_passive = rejections_passive+ result_passive

    
    rejections_seq = rejections_seq/nsims
    rejections_active = rejections_active/nsims
    rejections_passive = rejections_passive/nsims
    

    
    # Add the results to the DataFrame
    results_list.append({'Round':j,'DataFrame': 'passive', 'Rejections 1': rejections_passive[0],
                         'Rejections 2': rejections_passive[1]})
    results_list.append({'Round':j,'DataFrame': 'active', 'Rejections 1': rejections_active[0],
                        'Rejections 2': rejections_active[1]})
    results_list.append({'Round':j,'DataFrame': 'seq', 'Rejections 1': rejections_seq[0],
                        'Rejections 2': rejections_seq[1]})

 
results_df = pd.DataFrame(data=results_list,columns=['Round','DataFrame','Rejections 1','Rejections 2'])

In [5]:
print(results_df[['Round','DataFrame','Rejections 2','Rejections 1']])  

   Round DataFrame  Rejections 2  Rejections 1
0      0   passive         0.010         0.100
1      0    active         1.000         1.000
2      0       seq         1.000         0.678
3      1   passive         0.010         0.000
4      1    active         1.000         1.000
5      1       seq         0.052         0.046


In [6]:
#quickly adjust for the fact that the test is a 2 stage test... only run stage 2 if i reject passive...
results_df['Round 2'] = results_df['Rejections 1'].copy()
results_df.loc[results_df['DataFrame']=='passive','Round 2'] = (1  - results_df['Rejections 2'] 
                                                             + results_df['Rejections 1'] )[results_df['DataFrame']=='passive']

print(results_df[['Round','DataFrame','Rejections 2','Round 2']])  

   Round DataFrame  Rejections 2  Round 2
0      0   passive         0.010    1.090
1      0    active         1.000    1.000
2      0       seq         1.000    0.678
3      1   passive         0.010    0.990
4      1    active         1.000    1.000
5      1       seq         0.052    0.046
