In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
from scipy.optimize import minimize
from scipy.optimize import fsolve
from matplotlib import pyplot as plt
import pandas as pd
import statsmodels.api as sm

import sys
sys.path.append("../")
from bargains_linear import * 

In [4]:
def perturb_c(df, d1, d2):
    # Get a list of columns
    columns = [col for col in df.columns if col not in ['c1', 'c2','c1-c2']]
    df['c1-c2'] = df['c1'] - df['c2']

    # Create a new dataframe with d1 and d2
    new_df = pd.DataFrame({'c1': d1, 'c2': d2,'const':np.ones(len(d1))})
    new_df['c1-c2'] = new_df['c1'] - new_df['c2']

    # Go through the columns in the old dataframe
    # Try to predict them and a function of c1 and c2
    for col in columns:
        # Regress df['c1'] and df['c2'] on the various columns using status models
        model = sm.OLS(df[col], sm.add_constant(df[['c1-c2']]))
        model_fit = model.fit()
        new_df[col] = model_fit.predict(new_df[['const','c1-c2']])
        
    return new_df.drop(labels='const',axis=1)

passive_result1 = pd.read_csv('../GMM_est/fake_data/passive_data_0.csv',index_col=0)
active_result1 = pd.read_csv('../GMM_est/fake_data/active_data_0.csv',index_col=0)
seq_result1 = pd.read_csv('../GMM_est/fake_data/seq_data_0.csv',index_col=0)


for df in [passive_result1,active_result1,seq_result1]:
    res = perturb_c(df,[1],[0])
    print(res)

   c1  c2  c1-c2      phi1      phi2         p1         p2        s1        s2
0   1   0      1  7.357797  7.670039  12.633661  12.405673  0.477293  0.522707
   c1  c2  c1-c2       phi1       phi2         p1         p2        s1  \
0   1   0      1  12.936133  13.599684  18.329101  18.218215  0.488911   

         s2  
0  0.511089  
   c1  c2  c1-c2       phi1       phi2         p1        p2        s1  \
0   1   0      1  16.226934  19.412756  22.460658  23.19053  0.577082   

         s2  
0  0.422918  


In [5]:
def solve_c1_c2(beta1,lambda_val,phi1,phi2):
    return (3.0*beta1*lambda_val + 2.0*beta1*phi2 - 3.0*lambda_val + 2.0*phi1 - 2.0*phi2) / (beta1 - 1.0) 

def solve_beta1(c1_c2, lambda_val,phi1,phi2):
    return (3.0*lambda_val - 2.0*phi1 + 2.0*phi2 - c1_c2 )/(3.0*lambda_val + 2.0*phi2 - c1_c2)

def solve_foc(beta1, c1_c2, lambda_val, phi1, phi2):
    return beta1*(3.0*lambda_val + 2.0*phi2 - c1_c2) - (3.0*lambda_val - 2.0*phi1 + 2.0*phi2 - c1_c2 )

def calc_foc_se(beta1, c1_c2, lambda_val, phi1, phi2):
    cov = np.cov(np.concatenate([phi1, phi2]))
    gradient = np.array([-2, 2*beta1-2])
    se = gradient.T.dot(cov).dot(gradient)
    return se

def create_test_stat(df,c1_c2=0):
    df = df.copy()
    lambda_val = ( df['p1'] - (2*df['phi1'] + df['phi2'])/3 ).mean()
    
    beta1 = solve_beta1(c1_c2, lambda_val,df['phi1'].mean(),df['phi2'].mean())
    c1_c2_obs = solve_c1_c2(beta1,lambda_val,df['phi1'],df['phi2'])
    beta1_obs = solve_beta1(c1_c2_obs, lambda_val,df['phi1'],df['phi2'])
    
    df_new = perturb_c(df, c1_c2_obs+1, np.zeros(df.shape[0]))
    beta1_new = solve_beta1(c1_c2_obs+1, lambda_val,df_new['phi1'],df_new['phi2'])
    
    foc_obs = solve_foc(beta1,c1_c2_obs, lambda_val,df['phi1'],df['phi2'])
    foc_new = solve_foc(beta1,c1_c2_obs+1, lambda_val,df_new['phi1'],df_new['phi2'])
    
    foc_se = calc_foc_se(beta1, c1_c2, lambda_val, df['phi1'],df['phi2'])
  
    return np.sqrt(beta1_new.shape[0])*np.abs(foc_new).mean(), 2*np.sqrt(foc_se)


def run_test(df):
    test_stat, se = create_test_stat(df)
    return test_stat >= se

passive_result1 = pd.read_csv('../GMM_est/fake_data/passive_data_0.csv',index_col=0)
active_result1 = pd.read_csv('../GMM_est/fake_data/active_data_0.csv',index_col=0)
seq_result1 = pd.read_csv('../GMM_est/fake_data/seq_data_0.csv',index_col=0)
for df in [passive_result1,active_result1,seq_result1]:
    print(create_test_stat(df.sample(frac=0.1)))
    print('--')

(1.40701570411393, 7.488148181018069)
--
(142.08651105009776, 11.833746474874674)
--
(306.4477602302424, 31.23781762986797)
--


In [None]:
# Create an empty DataFrame to store the results
results_list = []

for j in range(5):
    # Read the dataframes
    seq_result = pd.read_csv('../GMM_est/fake_data/seq_data_%s.csv' % j, index_col=0)
    active_result = pd.read_csv('../GMM_est/fake_data/active_data_%s.csv' % j, index_col=0)
    passive_result = pd.read_csv('../GMM_est/fake_data/passive_data_%s.csv' % j, index_col=0)

    # Initialize rejection count variables
    rejections_seq = 0
    rejections_active = 0
    rejections_passive = 0

    nsims = 25
    
    for i in range(nsims):
        # Resample from seq dataframe
        subsample_seq = seq_result.sample(frac=0.1)
        result_seq = run_test(subsample_seq)
        rejections_seq = rejections_seq+ result_seq
        
        # Resample from active dataframe
        subsample_active = active_result.sample(frac=0.1)
        result_active = run_test(subsample_active)
        rejections_active = rejections_active+ result_active

        # Resample from passive dataframe
        subsample_passive = passive_result.sample(frac=0.1)
        result_passive = run_test(subsample_passive)
        rejections_passive = rejections_passive+ result_passive
        #print('----%s,%s------'%(i,j))
        
    rejections_seq = rejections_seq/nsims
    rejections_active = rejections_active/nsims
    rejections_passive = rejections_passive/nsims
    
    # Add the results to the DataFrame
    results_list.append({'Round':j,'DataFrame': 'passive', 'Rejections': rejections_passive})
    results_list.append({'Round':j,'DataFrame': 'active', 'Rejections': rejections_active})
    results_list.append({'Round':j,'DataFrame': 'seq', 'Rejections': rejections_seq})

results_df = pd.DataFrame(data=results_list,columns=['Round','DataFrame','Rejections'])
        
print(results_df)  