In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
from scipy.optimize import minimize
from scipy.optimize import fsolve
from matplotlib import pyplot as plt
import pandas as pd
import statsmodels.api as sm

import sys
sys.path.append("../")
from bargains_linear import * 

In [28]:
def solve_beta1(c1_c2, lambda_val,phi1,phi2):
    return (3.0*lambda_val - 2.0*phi1 + 2.0*phi2 - c1_c2 ).mean()/(3.0*lambda_val + 2.0*phi2 - c1_c2).mean()


def create_test_stat(df,c1_c2=0):
    lambda_val = ( df['p1'] - (2*df['phi1'] + df['phi2'])/3 ).mean()
    beta1 = solve_beta1(0, lambda_val,df['phi1'],df['phi2'])
    beta2 = solve_beta1(0, lambda_val,df['phi2'],df['phi1'])

    phi1_pred = 3*lambda_val*(beta1*beta2 - 2*beta1 - beta2 + 2)/(2*(-beta1*beta2 + beta1 + beta2))
    phi2_pred = 3*lambda_val*(beta1*beta2 - beta1 - 2*beta2 + 2)/(2*(-beta1*beta2 + beta1 + beta2))
    
    test_stat = phi1_pred - df['phi1']
    return np.sqrt(test_stat.shape[0])*(test_stat).mean(), 1.96*test_stat.std()


def run_test(df):
    test_stat, se = create_test_stat(df)
    return np.abs(test_stat) >= se

passive_result1 = pd.read_csv('../GMM_est/fake_data/passive_data_4.csv',index_col=0)
active_result1 = pd.read_csv('../GMM_est/fake_data/active_data_4.csv',index_col=0)
seq_result1 = pd.read_csv('../GMM_est/fake_data/seq_data_4.csv',index_col=0)
for df in [passive_result1,active_result1,seq_result1]:
    sample = df.sample(frac=0.1)
    print(create_test_stat(sample))
    print(run_test(sample))
    print('--')

(4.403989502982614e-14, 5.744910071948477)
False
--
(2.4491574276791064e-14, 4.355609152437135)
False
--
(-1.0335893731489809e-13, 11.070825926026997)
False
--


In [3]:
# Create an empty DataFrame to store the results
results_list = []

for j in range(5):
    # Read the dataframes
    seq_result = pd.read_csv('../GMM_est/fake_data/seq_data_%s.csv' % j, index_col=0)
    active_result = pd.read_csv('../GMM_est/fake_data/active_data_%s.csv' % j, index_col=0)
    passive_result = pd.read_csv('../GMM_est/fake_data/passive_data_%s.csv' % j, index_col=0)

    # Initialize rejection count variables
    rejections_seq = 0
    rejections_active = 0
    rejections_passive = 0

    nsims = 200
    
    for i in range(nsims):
        # Resample from seq dataframe
        subsample_seq = seq_result.sample(frac=0.1)
        result_seq = run_test(subsample_seq)
        rejections_seq = rejections_seq+ result_seq
        
        # Resample from active dataframe
        subsample_active = active_result.sample(frac=0.1)
        result_active = run_test(subsample_active)
        rejections_active = rejections_active+ result_active

        # Resample from passive dataframe
        subsample_passive = passive_result.sample(frac=0.1)
        result_passive = run_test(subsample_passive)
        rejections_passive = rejections_passive+ result_passive
        #print('----%s,%s------'%(i,j))
        
    rejections_seq = rejections_seq/nsims
    rejections_active = rejections_active/nsims
    rejections_passive = rejections_passive/nsims
    
    # Add the results to the DataFrame
    results_list.append({'Round':j,'DataFrame': 'passive', 'Rejections': rejections_passive})
    results_list.append({'Round':j,'DataFrame': 'active', 'Rejections': rejections_active})
    results_list.append({'Round':j,'DataFrame': 'seq', 'Rejections': rejections_seq})

results_df = pd.DataFrame(data=results_list,columns=['Round','DataFrame','Rejections'])
        
print(results_df)  

    Round DataFrame  Rejections
0       0   passive       0.000
1       0    active       0.005
2       0       seq       1.000
3       1   passive       0.000
4       1    active       0.015
5       1       seq       1.000
6       2   passive       0.000
7       2    active       0.015
8       2       seq       1.000
9       3   passive       1.000
10      3    active       1.000
11      3       seq       1.000
12      4   passive       1.000
13      4    active       1.000
14      4       seq       1.000
