In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
from scipy.optimize import minimize
from scipy.optimize import fsolve
from matplotlib import pyplot as plt
import pandas as pd
import statsmodels.api as sm

import sys
sys.path.append("../")
from bargains_linear import * 

In [2]:
def solve_c1_c2(beta1,lambda_val,phi1,phi2):
    return (3.0*beta1*lambda_val + 2.0*beta1*phi2 - 3.0*lambda_val + 2.0*phi1 - 2.0*phi2) / (beta1 - 1.0) 

def solve_beta1(c1_c2, lambda_val,phi1,phi2):
    return (3.0*lambda_val - 2.0*phi1 + 2.0*phi2 - c1_c2 )/(3.0*lambda_val + 2.0*phi2 - c1_c2)

def solve_foc(beta1, c1_c2, lambda_val, phi1, phi2):
    return beta1*(3.0*lambda_val + 2.0*phi2 - c1_c2) - (3.0*lambda_val - 2.0*phi1 + 2.0*phi2 - c1_c2 )

In [3]:
def calc_foc_se(beta1, c1_c2, lambda_val, phi1, phi2):
    cov = np.cov(np.concatenate([phi1, phi2]))
    gradient = np.array([-2, 2*beta1-2])
    se = gradient.T.dot(cov).dot(gradient)
    return se


def perturb_c(df, c1_c2_obs,shift):
    # Create a new dataframe with d1 and d2
    new_df = df.copy()[['c1','c2','phi1','phi2']]
    new_df['c1'] = new_df['c1'] + shift
    
    model = sm.OLS( c1_c2_obs, sm.add_constant( df[['c1','c2']] ) )
    model_fit = model.fit()
    new_df['c1_c2_new'] = model_fit.predict( sm.add_constant(new_df[['c1','c2']]) )
    new_df['c1_c2_new']  = new_df['c1_c2_new'] + model_fit.resid
    old_resid = model_fit.resid
    
    for col in ['phi1','phi2']:
        model = sm.OLS(df[col], sm.add_constant( new_df['c1_c2_new'] - 1 - old_resid ) )
        model_fit = model.fit()
        new_phi = model_fit.predict( sm.add_constant(new_df[['c1_c2_new']]) )
        new_df[col] = new_phi 

    return new_df


def create_test_stat(df,c1_c2=0):
    df = df.copy()
    lambda_val = ( df['p1'] - (2*df['phi1'] + df['phi2'])/3 ).mean()
    
    beta1 = solve_beta1(c1_c2, lambda_val,df['phi1'].mean(),df['phi2'].mean())
    c1_c2_obs = solve_c1_c2(beta1,lambda_val,df['phi1'],df['phi2'])
    beta1_obs = solve_beta1(c1_c2_obs, lambda_val,df['phi1'],df['phi2'])
    df['c1-c2'] = c1_c2_obs

    df_new = perturb_c( df, c1_c2_obs, 1 )
    foc_new = solve_foc(beta1,df_new['c1_c2_new'], lambda_val,df_new['phi1'],df_new['phi2'])
    
    foc_se = calc_foc_se(beta1, c1_c2, lambda_val, df_new['phi1'],df_new['phi2'])
    

    return np.sqrt(foc_new.shape[0])*np.abs(foc_new).mean(), 1.96*np.sqrt(foc_se)


def run_test(df):
    test_stat, se = create_test_stat(df)
    return np.abs(test_stat) >= se

passive_result1 = pd.read_csv('../GMM_est/fake_data/passive_data_0.csv',index_col=0)
active_result1 = pd.read_csv('../GMM_est/fake_data/active_data_0.csv',index_col=0)
seq_result1 = pd.read_csv('../GMM_est/fake_data/seq_data_0.csv',index_col=0)
for df in [passive_result1,active_result1,seq_result1]:
    print(create_test_stat(df.sample(frac=0.1)))
    print('--')

(0.25309588080333884, 5.4117544456309945)
--
(14.785392790088494, 10.999300838554166)
--
(30.034285579899052, 31.399747067384798)
--


In [4]:
# Create an empty DataFrame to store the results
results_list = []

for j in range(5):
    # Read the dataframes
    seq_result = pd.read_csv('../GMM_est/fake_data/seq_data_%s.csv' % j, index_col=0)
    active_result = pd.read_csv('../GMM_est/fake_data/active_data_%s.csv' % j, index_col=0)
    passive_result = pd.read_csv('../GMM_est/fake_data/passive_data_%s.csv' % j, index_col=0)

    # Initialize rejection count variables
    rejections_seq = 0
    rejections_active = 0
    rejections_passive = 0

    nsims = 200
    
    for i in range(nsims):
        # Resample from seq dataframe
        subsample_seq = seq_result.sample(frac=0.1)
        result_seq = run_test(subsample_seq)
        rejections_seq = rejections_seq+ result_seq
        
        # Resample from active dataframe
        subsample_active = active_result.sample(frac=0.1)
        result_active = run_test(subsample_active)
        rejections_active = rejections_active+ result_active

        # Resample from passive dataframe
        subsample_passive = passive_result.sample(frac=0.1)
        result_passive = run_test(subsample_passive)
        rejections_passive = rejections_passive+ result_passive
        #print('----%s,%s------'%(i,j))
        
    rejections_seq = rejections_seq/nsims
    rejections_active = rejections_active/nsims
    rejections_passive = rejections_passive/nsims
    
    # Add the results to the DataFrame
    results_list.append({'Round':j,'DataFrame': 'passive', 'Rejections': rejections_passive})
    results_list.append({'Round':j,'DataFrame': 'active', 'Rejections': rejections_active})
    results_list.append({'Round':j,'DataFrame': 'seq', 'Rejections': rejections_seq})

results_df = pd.DataFrame(data=results_list,columns=['Round','DataFrame','Rejections'])
        
print(results_df)  

    Round DataFrame  Rejections
0       0   passive       0.000
1       0    active       1.000
2       0       seq       0.030
3       1   passive       0.000
4       1    active       1.000
5       1       seq       0.075
6       2   passive       0.000
7       2    active       1.000
8       2       seq       0.000
9       3   passive       0.000
10      3    active       0.000
11      3       seq       1.000
12      4   passive       0.000
13      4    active       0.000
14      4       seq       0.215
