In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
from scipy.optimize import minimize
from scipy.optimize import fsolve
from matplotlib import pyplot as plt
import pandas as pd
import statsmodels.api as sm
import statsmodels.stats.api as sms


import sys
sys.path.append("../")
from bargains_linear import * 

In [2]:
def run_test_logit(df,print_v=False):
    #make oustide options
    df = df[ (df['s1']!=0) & (df['s1']!=1) &  (df['phi1']>=0) & (df['phi2']>=0) & (np.abs(df['phi1'])<=100) & (np.abs(df['phi2'])<=100) ]
    df = df.reindex()
    
    df['s1*phi1'] =   (df['s1'])*df['phi1']
    df['phi1*(s1+s2)'] =df['phi1'] 
    
    #control columns...
    df['pi_2'] = (df['p2'] - df['phi2'])*df['s2']
    df['pi_h'] = (df['phi2'])*df['s2'] + (df['phi1'])*df['s1']
    control_cols = ['pi_2']
    for col in control_cols:
        df[col+'^2'] = df[col]**2
        df[col+'^(-1)'] = (df[col])**(-1)
        control_cols = control_cols + [col+'^(-1)',col+'^2']
    
    
    #check which one is most predictive
    r2_values = {}
    
    transformed_cols = ['s1*phi1', 'v']#, 'phi1*(s1+s2)']
    
    for col in transformed_cols:
        df[col+'^2'] = df[col]**2
        df[col+'^(-1)'] = (df[col])**(-1)

        # Perform regressions and calculate R-squared
        predictors = sm.add_constant(df[control_cols+[col,col+'^2',col+'^(-1)']])
        model = sm.OLS(df['phi2'], predictors).fit()
        r2 = model.rsquared 
        r2_values[col] = r2
    

    max_r2_var = max(r2_values, key=r2_values.get)
    max_r2_var_index = transformed_cols.index(max_r2_var)
    max_r2_var_value = r2_values[max_r2_var]

    max_r2_var_value = r2_values.pop(max_r2_var)

    # Get the second highest R2 value and its corresponding variable
    max_r2_var_second = max(r2_values, key=r2_values.get)
    max_r2_var_second_value = r2_values[max_r2_var_second]

    # If the difference between the highest and second highest R2 values is less than or equal to 0.05, set the index to 4
    if max_r2_var_value - max_r2_var_second_value <= 0.02:
        max_r2_var_index = 3
    
    
    return max_r2_var_index
    

passive_result1 = pd.read_csv('../GMM_est/fake_data_wtp_logit_fixed/passive_data_0.csv',index_col=0)
active_result1 = pd.read_csv('../GMM_est/fake_data_wtp_logit_fixed/active_data_0.csv',index_col=0)
seq_result1 = pd.read_csv('../GMM_est/fake_data_wtp_logit_fixed/seq_data_0.csv',index_col=0)
print(seq_result1)


dfs = [passive_result1,active_result1,seq_result1]

for i in range(3): 
    df = dfs[i]
    df_sample = df.sample(frac=0.5)
    print('XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX')
    print(run_test_logit(df_sample))


         phi1       phi2         p1         p2        s1        s2        x1  \
0   18.003399  20.897494  27.095218  28.563246  0.457114  0.340810 -0.364276   
0   18.000502  21.530924  26.965569  28.741932  0.469101  0.328832 -0.167739   
0   18.498544  21.112347  27.137996  28.526526  0.463106  0.350812 -0.216219   
0   18.359729  21.771147  27.326678  28.691419  0.459053  0.349400  0.196457   
0   18.161885  20.677460  26.744038  27.800958  0.444996  0.360208 -0.447703   
..        ...        ...        ...        ...       ...       ...       ...   
0   18.478128  21.453276  27.326678  28.730203  0.455878  0.344302 -0.285924   
0   18.721610  21.748656  27.506748  28.675280  0.445584  0.352722  0.364925   
0   18.261433  21.250686  27.423014  28.391909  0.440728  0.363089 -0.059837   
0   18.315579  20.652497  26.889782  27.904881  0.445625  0.363747 -0.443736   
0   18.505286  21.067207  27.659116  28.714626  0.433159  0.350725  0.128829   

          x2          v  
0  -0.342756 

In [3]:
# Create an empty DataFrame to store the results
results_list = []

for j in [0,1,2]:
    # Read the dataframes
    seq_result = pd.read_csv('../GMM_est/fake_data_wtp_logit_fixed/seq_data_%s.csv' % j, index_col=0)
    active_result = pd.read_csv('../GMM_est/fake_data_wtp_logit_fixed/active_data_%s.csv' % j, index_col=0)
    passive_result = pd.read_csv('../GMM_est/fake_data_wtp_logit_fixed/passive_data_%s.csv' % j, index_col=0)

    # Initialize rejection count variables
    rejections_seq = np.array([0,0,0,0])
    rejections_active = np.array([0,0,0,0])
    rejections_passive = np.array([0,0,0,0])

    nsims = 100
    
    for i in range(nsims):
        # Resample from seq dataframe
        subsample_seq = seq_result.sample(frac=0.5)
        result_seq = run_test_logit(subsample_seq)
        rejections_seq[result_seq] = rejections_seq[result_seq] + 1
        
        
        # Resample from active dataframe
        subsample_active = active_result.sample(frac=0.5)
        result_active = run_test_logit(subsample_active)
        rejections_active[result_active] = rejections_active[result_active] + 1

        # Resample from passive dataframe
        subsample_passive = passive_result.sample(frac=0.5)
        result_passive = run_test_logit(subsample_passive)
        rejections_passive[result_passive] = rejections_passive[result_passive]+ 1

    
    rejections_seq = rejections_seq/nsims
    rejections_active = rejections_active/nsims
    rejections_passive = rejections_passive/nsims
    
    
    # Add the results to the DataFrame
    results_list.append({'Round': j, 'DataFrame': 'passive', 'Passive Selected': rejections_passive[0],
                         'Active Selected': rejections_passive[1], 'Seq Selected': rejections_passive[2],
                         'No Selection': rejections_passive[3]})
    results_list.append({'Round': j, 'DataFrame': 'active', 'Passive Selected': rejections_active[0],
                         'Active Selected': rejections_active[1], 'Seq Selected': rejections_active[2],
                         'No Selection': rejections_active[3]})
    results_list.append({'Round': j, 'DataFrame': 'seq', 'Passive Selected': rejections_seq[0],
                         'Active Selected': rejections_seq[1], 'Seq Selected': rejections_seq[2],
                         'No Selection': rejections_seq[3]})
    
 
results_df = pd.DataFrame(data=results_list,columns=['Round','DataFrame','No Selection','Passive Selected','Active Selected','Seq Selected'])
print(results_df)

   Round DataFrame  No Selection  Passive Selected  Active Selected  \
0      0   passive          0.01              0.99              0.0   
1      0    active          0.00              0.00              1.0   
2      0       seq          0.00              0.00              1.0   
3      1   passive          0.00              1.00              0.0   
4      1    active          0.00              0.00              1.0   
5      1       seq          0.00              0.00              1.0   
6      2   passive          0.00              1.00              0.0   
7      2    active          0.00              0.00              1.0   
8      2       seq          0.00              0.00              1.0   

   Seq Selected  
0           0.0  
1           0.0  
2           0.0  
3           0.0  
4           0.0  
5           0.0  
6           0.0  
7           0.0  
8           0.0  
