In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
from scipy.optimize import minimize
from scipy.optimize import fsolve
from matplotlib import pyplot as plt
import pandas as pd
import statsmodels.api as sm
import statsmodels.stats.api as sms
import matplotlib.pyplot as plt 

import sys
sys.path.append("../")
from bargains_linear import * 

In [2]:
def solve_c1_c2(beta1,lambda_val,phi1,phi2):
    return (3.0*beta1*lambda_val + 2.0*beta1*phi2 - 3.0*lambda_val + 2.0*phi1 - 2.0*phi2) / (beta1 - 1.0) 

def solve_beta1(c1_c2, lambda_val,phi1,phi2):
    return (3.0*lambda_val - 2.0*phi1 + 2.0*phi2 - c1_c2 )/(3.0*lambda_val + 2.0*phi2 - c1_c2)

def solve_foc(beta1, c1_c2, lambda_val, phi1, phi2):
    return beta1*(3.0*lambda_val + 2.0*phi2 - c1_c2) - (3.0*lambda_val - 2.0*phi1 + 2.0*phi2 - c1_c2 )

In [3]:
def create_test_stat(df,c1_c2=0):
    df = df.copy()
    lambda_val = ( df['p1'] - (2*df['phi1'] + df['phi2'])/3 ).mean()
    
    beta1 = solve_beta1(c1_c2, lambda_val,df['phi1'].mean(),df['phi2'].mean())
    c1_c2_obs = solve_c1_c2(beta1,lambda_val,df['phi1'],df['phi2'])
    beta1_obs = solve_beta1(c1_c2_obs, lambda_val,df['phi1'],df['phi2'])
    
    # Create a new dataframe with d1 and d2
    
    model = sm.OLS(df['phi2'], sm.add_constant(c1_c2_obs) )
    #model = sm.OLS(c1_c2_obs, sm.add_constant( df[['c1','c2']]) )
    model_fit = model.fit()
    

    clean_resid = np.abs(model_fit.resid) < 2.75*model_fit.resid.std()
    model = sm.OLS(c1_c2_obs[clean_resid], sm.add_constant( df[clean_resid][['c2']]) )
    model_fit = model.fit()
    #print(model_fit.summary())
    #print(model_fit.summary())
    #print(clean_resid.sum()/clean_resid.count())
    #print(sms.jarque_bera(model_fit.resid))
    #print(sms.jarque_bera(model_fit.resid)[1])
    
    #return sms.omni_normtest(model_fit.resid)[1] , .05
    return model_fit.fvalue,model_fit.resid.shape[0]*.675

def run_test(df):
    test_stat, se = create_test_stat(df)
    return np.abs(test_stat) <= se

passive_result1 = pd.read_csv('../GMM_est/fake_data/passive_data_0.csv',index_col=0)
active_result1 = pd.read_csv('../GMM_est/fake_data/active_data_0.csv',index_col=0)
seq_result1 = pd.read_csv('../GMM_est/fake_data/seq_data_0.csv',index_col=0)
for df in [passive_result1,active_result1,seq_result1]:
    sample = df.sample(frac=0.1)
    print(run_test(sample))
    print('--')

False
--
True
--
True
--


In [4]:
print('--------------------')
passive_result1 = pd.read_csv('../GMM_est/fake_data/passive_data_2.csv',index_col=0)
active_result1 = pd.read_csv('../GMM_est/fake_data/active_data_2.csv',index_col=0)
seq_result1 = pd.read_csv('../GMM_est/fake_data/seq_data_2.csv',index_col=0)
for df in [passive_result1,active_result1,seq_result1]:
    sample = df.sample(frac=0.1)
    print(create_test_stat(sample))
    print(run_test(sample))

--------------------
(887.9552938263112, 668.25)
False
(637.917843582028, 657.45)
True
(732.1528418043781, 669.6)
False


In [5]:
# Create an empty DataFrame to store the results
results_list = []

for j in range(5):
    # Read the dataframes
    seq_result = pd.read_csv('../GMM_est/fake_data/seq_data_%s.csv' % j, index_col=0)
    active_result = pd.read_csv('../GMM_est/fake_data/active_data_%s.csv' % j, index_col=0)
    passive_result = pd.read_csv('../GMM_est/fake_data/passive_data_%s.csv' % j, index_col=0)

    # Initialize rejection count variables
    rejections_seq = 0
    rejections_active = 0
    rejections_passive = 0

    nsims = 500
    
    for i in range(nsims):
        # Resample from seq dataframe
        subsample_seq = seq_result.sample(frac=0.1)
        result_seq = run_test(subsample_seq)
        rejections_seq = rejections_seq+ result_seq
        
        # Resample from active dataframe
        subsample_active = active_result.sample(frac=0.1)
        result_active = run_test(subsample_active)
        rejections_active = rejections_active+ result_active

        # Resample from passive dataframe
        subsample_passive = passive_result.sample(frac=0.1)
        result_passive = run_test(subsample_passive)
        rejections_passive = rejections_passive+ result_passive
        #print('----%s,%s------'%(i,j))
        
    rejections_seq = rejections_seq/nsims
    rejections_active = rejections_active/nsims
    rejections_passive = rejections_passive/nsims
    
    # Add the results to the DataFrame
    results_list.append({'Round':j,'DataFrame': 'passive', 'Rejections': rejections_passive})
    results_list.append({'Round':j,'DataFrame': 'active', 'Rejections': rejections_active})
    results_list.append({'Round':j,'DataFrame': 'seq', 'Rejections': rejections_seq})

results_df = pd.DataFrame(data=results_list,columns=['Round','DataFrame','Rejections'])
        
print(results_df)  

    Round DataFrame  Rejections
0       0   passive       0.018
1       0    active       0.118
2       0       seq       0.764
3       1   passive       0.002
4       1    active       0.034
5       1       seq       0.438
6       2   passive       0.004
7       2    active       0.140
8       2       seq       0.012
9       3   passive       0.064
10      3    active       0.074
11      3       seq       0.534
12      4   passive       0.004
13      4    active       0.076
14      4       seq       1.000
