# PCA on Covariates Simulations Notebook

In [48]:
#%pip install tabulate
from tabulate import tabulate

In [12]:
# Packages
import numpy as np
import pandas as pd
import seaborn as sns
from pca import pca
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import FactorAnalysis
import math
from tqdm import tqdm

In [13]:
# Supressing Output
from contextlib import contextmanager
import sys, os

@contextmanager
def suppress_stdout():
    with open(os.devnull, "w") as devnull:
        old_stdout = sys.stdout
        sys.stdout = devnull
        try:  
            yield
        finally:
            sys.stdout = old_stdout

**My main changes and what I want people to review in the code block below:**
    
1) I added an IV section

2) I added a new thing where we take half of the mismeasured covariate values and then use them as the exponent to raise e to (e.g. z4 = e**z4)

3) i got rid of the 1,000 value for beta1 and beta2

4) i got rid of the p = 10 value

**Comments on those changes:**

1) i'm curious if i did this right

2) i did this because of bonhomme's feedback about where PCA might outperform taking the simple avg. we've gotten some tentative signs that PCA does do better than the avg when you stretch out he observations

3) , 4) it seemed like we didn't really need those values

In [14]:
# Data frame to store output
output = pd.DataFrame()

# 2,000 observations
N = 2000

# Loop over combinations of: betas, covariances between variables, p numbers of parameters
for beta1 in [0.1,1,10]:
    for beta2 in [0.1,1,10]:
        for covariance in [-0.9,-0.5,0,0.5,0.9]:
            for p in [5,20,50]:
                # Select only the scenarios we actually want to run by default - betas of 1, covariances of 0.5, p of 5... require three of these conditions to be satisfied to run.
                counter = 0
                if beta1 == 1:
                    counter += 1
                if beta2 == 1:
                    counter += 1
                if covariance == 0.5:
                    counter += 1
                if p == 5:
                    counter+=1
                if counter >= 3:
                    # Run with and without transformations
                    for exp_of_var in ['yes','no']:
                        # 1000 simulations
                        for k in tqdm(range(1)):
                            # Initialize Lists to store coef values for all five methods and the true coef
                            pca_coef = []
                            mismeasured_coef = []
                            mismeasured_allvar_coef = []
                            mismeasured_avg_coef = []
                            iv_coef = []
                            true_val_coef =[]

                            # Create variables
                            vars_mean = [0,0,0]
                            vars_cov = np.array([[1,covariance,0],
                                                 [covariance,1,0],
                                                 [0,0,1]])
                            # Producing 3 variables: x for the variable of interest, the true Z covariate, the random error
                            vars_ = pd.DataFrame(np.random.multivariate_normal(vars_mean, vars_cov, N), columns = ['x','true_z','u'])
                            vars_['y'] = beta1 * vars_['x'] + beta2 * vars_['true_z'] + vars_['u']

                            # Create measurement errors for each of the p measurements of the covariates- mean zero and variance one
                            errors_mean = np.zeros(p)
                            errors_cov = np.zeros((p,p))
                            for i in range(p):
                                for j in range(p):
                                    if i == j:
                                        errors_cov[i,j] = 1

                            errors = np.random.multivariate_normal(errors_mean, errors_cov, N)
                            # Column labels for Z variables (covariates variables mismeasured)
                            z_vars = []
                            for i in range(p):
                                z_vars.append('z'+str(i+1))
                            # Add errors to the true_z to get mismeasured values
                            mismeasured_z = pd.DataFrame(errors, columns = z_vars)
                            for i in mismeasured_z.columns:
                                mismeasured_z[i] = mismeasured_z[i] + vars_['true_z']

                            # Take e to the power of the values for half of the measurements if log_of_var is true
                            if exp_of_var == 'yes':
                                mismeasured_z.iloc[:,int(len(mismeasured_z.columns)/2):] =np.exp(mismeasured_z.iloc[:,int(len(mismeasured_z.columns)/2):])
                                
                            # Do feature scaling (normalize to mean 0 and variance 1) for the mismeasured z
                            # Note that x and y are already normalized by construction
                            scaled_mismeasured_z = mismeasured_z.copy()
                            for i in mismeasured_z.columns:
                                scaled_mismeasured_z[i] = (mismeasured_z[i] - mismeasured_z[i].mean()) / mismeasured_z[i].std()

                            # Suppress output
                            with suppress_stdout():
                                # Use PCA on the mismeasured values
                                pca_model = pca()
                                pca_results = pca_model.fit_transform(scaled_mismeasured_z)
                                pca_z = pca_results['PC']['PC1']

                            # NOTE: in non-pca cases, no need to rescale or normalize since mismeasured variables and x and y have mean 0 and sd 1

                            # Average mismeasured variables:
                            vars_['avg_mismeasured_z'] = mismeasured_z[z_vars].mean(axis=1)

                            # Add relevant variables to vars_ dataframe
                            vars_[mismeasured_z.columns] = mismeasured_z
                            vars_['pca_z'] = pca_z

                            # Single mismeasured covariate results
                            model_mismeasured = sm.OLS(vars_['y'],vars_[['x','z1']])
                            results_mismeasured = model_mismeasured.fit()
                            mismeasured_coef.append(results_mismeasured.params[0])

                            # All Variables Mismeasured Results
                            # Create full list of items to include in regression
                            tot_vars = ['x']
                            tot_vars.extend(z_vars)
                            model_mismeasured_allvar = sm.OLS(vars_['y'],vars_[tot_vars])
                            results_mismeasured_allvar = model_mismeasured_allvar.fit()
                            mismeasured_allvar_coef.append(results_mismeasured_allvar.params[0])

                            # Average Mismeasured Variables Results
                            model_mismeasured_avg = sm.OLS(vars_['y'],vars_[['x','avg_mismeasured_z']])
                            results_mismeasured_avg = model_mismeasured_avg.fit()
                            mismeasured_avg_coef.append(results_mismeasured_avg.params[0])

                            # PCA Results
                            model_pca = sm.OLS(vars_['y'],vars_[['x','pca_z']])
                            results_pca = model_pca.fit()
                            pca_coef.append(results_pca.params[0])

                            # Instrumental Variables Results
                            # Instrument z1 on the other items in the mismeasured df
                            z_string = mismeasured_z.columns[1]
                            for z in mismeasured_z.columns[2:]:
                                z_string = z_string + ' + ' + z
                            # Create the predicted value of z1
                            vars_['pred_z1'] = smf.ols("z1 ~ " + z_string, data = mismeasured_z).fit().predict()
                            iv_results = smf.ols("y ~ x + pred_z1 -1", data = vars_).fit()
                            iv_coef.append(iv_results.params[0])

                            # True Results
                            model_true = sm.OLS(vars_['y'],vars_[['x','true_z']])
                            results_true = model_true.fit()
                            true_val_coef.append(results_true.params[0])

                            # Output Findings
                            new_output = pd.DataFrame()
                            new_output['mismeasured_coef'] = mismeasured_coef
                            new_output['mismeasured_allvar_coef'] = mismeasured_allvar_coef
                            new_output['mismeasured_avg_coef'] = mismeasured_avg_coef
                            new_output['pca_coef'] = pca_coef
                            new_output['true_val_coef'] = true_val_coef
                            new_output['iv_coef'] = iv_coef
                            new_output['covariance'] = vars_cov[0][1]
                            new_output['beta1'] = beta1
                            new_output['beta2'] = beta2
                            new_output['p'] = p
                            new_output['exp_of_var'] = exp_of_var
                            output = output.append(new_output)

output

100%|██████████| 1/1 [00:00<00:00,  2.03it/s]
100%|██████████| 1/1 [00:00<00:00,  2.26it/s]
100%|██████████| 1/1 [00:00<00:00,  2.16it/s]
100%|██████████| 1/1 [00:00<00:00,  1.86it/s]
100%|██████████| 1/1 [00:00<00:00,  2.08it/s]
100%|██████████| 1/1 [00:00<00:00,  2.07it/s]
100%|██████████| 1/1 [00:00<00:00,  1.96it/s]
100%|██████████| 1/1 [00:00<00:00,  2.04it/s]
100%|██████████| 1/1 [00:00<00:00,  2.17it/s]
100%|██████████| 1/1 [00:00<00:00,  2.13it/s]
100%|██████████| 1/1 [00:00<00:00,  2.05it/s]
100%|██████████| 1/1 [00:00<00:00,  2.21it/s]
100%|██████████| 1/1 [00:00<00:00,  1.95it/s]
100%|██████████| 1/1 [00:00<00:00,  2.01it/s]
100%|██████████| 1/1 [00:00<00:00,  1.68it/s]
100%|██████████| 1/1 [00:00<00:00,  1.75it/s]
100%|██████████| 1/1 [00:00<00:00,  2.18it/s]
100%|██████████| 1/1 [00:00<00:00,  2.22it/s]
100%|██████████| 1/1 [00:00<00:00,  2.14it/s]
100%|██████████| 1/1 [00:00<00:00,  2.18it/s]
100%|██████████| 1/1 [00:00<00:00,  2.17it/s]
100%|██████████| 1/1 [00:00<00:00,

Unnamed: 0,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,covariance,beta1,beta2,p,exp_of_var
0,0.384711,0.284687,0.461622,0.304217,0.078832,0.336824,0.5,0.1,1.0,5,yes
0,0.416596,0.240255,0.237699,0.238001,0.110964,0.259016,0.5,0.1,1.0,5,no
0,1.023938,1.013558,1.047948,1.022123,1.000814,1.023621,0.5,1.0,0.1,5,yes
0,1.031123,1.014726,1.016047,1.015768,0.998597,1.019786,0.5,1.0,0.1,5,no
0,0.31296,0.434659,0.238264,0.414166,1.047758,0.359382,-0.9,1.0,1.0,5,yes
0,0.253317,0.557337,0.555925,0.556324,1.034771,0.521769,-0.9,1.0,1.0,5,no
0,0.670943,0.772139,0.606857,0.746793,0.960534,0.719947,-0.5,1.0,1.0,5,yes
0,0.70326,0.889607,0.891001,0.889483,1.01172,0.875617,-0.5,1.0,1.0,5,no
0,0.999917,1.000177,0.997883,0.996827,0.995787,0.995029,0.0,1.0,1.0,5,yes
0,0.99332,1.001567,1.001578,1.001842,0.993013,1.000098,0.0,1.0,1.0,5,no


In [15]:
output

Unnamed: 0,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,covariance,beta1,beta2,p,exp_of_var
0,0.384711,0.284687,0.461622,0.304217,0.078832,0.336824,0.5,0.1,1.0,5,yes
0,0.416596,0.240255,0.237699,0.238001,0.110964,0.259016,0.5,0.1,1.0,5,no
0,1.023938,1.013558,1.047948,1.022123,1.000814,1.023621,0.5,1.0,0.1,5,yes
0,1.031123,1.014726,1.016047,1.015768,0.998597,1.019786,0.5,1.0,0.1,5,no
0,0.31296,0.434659,0.238264,0.414166,1.047758,0.359382,-0.9,1.0,1.0,5,yes
0,0.253317,0.557337,0.555925,0.556324,1.034771,0.521769,-0.9,1.0,1.0,5,no
0,0.670943,0.772139,0.606857,0.746793,0.960534,0.719947,-0.5,1.0,1.0,5,yes
0,0.70326,0.889607,0.891001,0.889483,1.01172,0.875617,-0.5,1.0,1.0,5,no
0,0.999917,1.000177,0.997883,0.996827,0.995787,0.995029,0.0,1.0,1.0,5,yes
0,0.99332,1.001567,1.001578,1.001842,0.993013,1.000098,0.0,1.0,1.0,5,no


In [16]:
output.to_csv('C://Users//paulo//Documents//Econometrics and ML//Final Project//n_2000_results.csv')

ImportError: Missing optional dependency 'fsspec'.  Use pip or conda to install fsspec.

In [17]:
pd.set_option('display.max_rows', 10)

In [18]:
output['pca_ape'] = (output['pca_coef'] - output['beta1']).abs()/output['beta1']
output['mismeasured_coef_ape'] = (output['mismeasured_coef'] - output['beta1']).abs()/output['beta1']
output['mismeasured_allvar_coef_ape'] = (output['mismeasured_allvar_coef'] - output['beta1']).abs()/output['beta1']
output['mismeasured_avg_coef_ape'] = (output['mismeasured_avg_coef'] - output['beta1']).abs()/output['beta1']
output['iv_coef_ape'] = (output['iv_coef'] - output['beta1']).abs()/output['beta1']

In [19]:
output.groupby(['covariance','beta1','beta2','p','exp_of_var']).mean().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,exp_of_var,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape,iv_coef_ape
0,-0.9,1.0,1.0,5,no,0.253317,0.557337,0.555925,0.556324,1.034771,0.521769,0.443676,0.746683,0.442663,0.444075,0.478231
1,-0.9,1.0,1.0,5,yes,0.312960,0.434659,0.238264,0.414166,1.047758,0.359382,0.585834,0.687040,0.565341,0.761736,0.640618
2,-0.5,1.0,1.0,5,no,0.703260,0.889607,0.891001,0.889483,1.011720,0.875617,0.110517,0.296740,0.110393,0.108999,0.124383
3,-0.5,1.0,1.0,5,yes,0.670943,0.772139,0.606857,0.746793,0.960534,0.719947,0.253207,0.329057,0.227861,0.393143,0.280053
4,0.0,1.0,1.0,5,no,0.993320,1.001567,1.001578,1.001842,0.993013,1.000098,0.001842,0.006680,0.001567,0.001578,0.000098
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17,0.5,1.0,10.0,5,yes,3.753425,2.661201,4.314199,2.795113,0.957088,3.110050,1.795113,2.753425,1.661201,3.314199,2.110050
18,0.5,10.0,1.0,5,no,10.255386,10.084698,10.084437,10.084169,9.982149,10.104177,0.008417,0.025539,0.008470,0.008444,0.010418
19,0.5,10.0,1.0,5,yes,10.281280,10.172920,10.336066,10.189723,10.014172,10.225450,0.018972,0.028128,0.017292,0.033607,0.022545
20,0.9,1.0,1.0,5,no,1.776549,1.489890,1.488711,1.488764,1.062791,1.543923,0.488764,0.776549,0.489890,0.488711,0.543923


In [20]:
output.groupby(['covariance','beta1','beta2','p','exp_of_var']).std().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,exp_of_var,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape,iv_coef_ape
0,-0.9,1.0,1.0,5,no,,,,,,,,,,,
1,-0.9,1.0,1.0,5,yes,,,,,,,,,,,
2,-0.5,1.0,1.0,5,no,,,,,,,,,,,
3,-0.5,1.0,1.0,5,yes,,,,,,,,,,,
4,0.0,1.0,1.0,5,no,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17,0.5,1.0,10.0,5,yes,,,,,,,,,,,
18,0.5,10.0,1.0,5,no,,,,,,,,,,,
19,0.5,10.0,1.0,5,yes,,,,,,,,,,,
20,0.9,1.0,1.0,5,no,,,,,,,,,,,


In [21]:
output.loc[(output['beta1']==1)&(output['beta2']==1)&(output['covariance'] == 0.5)&(output['exp_of_var']=='yes')].groupby(['covariance','beta1','beta2','p']).mean().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape,iv_coef_ape
0,0.5,1.0,1.0,5,1.294988,1.190368,1.361737,1.18725,0.993675,1.240627,0.18725,0.294988,0.190368,0.361737,0.240627
1,0.5,1.0,1.0,20,1.238942,1.015418,1.239692,1.043035,0.980625,1.027858,0.043035,0.238942,0.015418,0.239692,0.027858
2,0.5,1.0,1.0,50,1.2682,1.017634,1.261771,1.058399,0.996999,1.041114,0.058399,0.2682,0.017634,0.261771,0.041114


In [22]:
output.loc[(output['beta1']==1)&(output['beta2']==1)&(output['covariance'] == 0.5)&(output['exp_of_var']=='yes')].groupby(['covariance','beta1','beta2','p']).std().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape,iv_coef_ape
0,0.5,1.0,1.0,5,,,,,,,,,,,
1,0.5,1.0,1.0,20,,,,,,,,,,,
2,0.5,1.0,1.0,50,,,,,,,,,,,


In [23]:
output.loc[(output['p']==5)&(output['beta2']==1)&(output['covariance'] == 0.5)&(output['exp_of_var']=='yes')].groupby(['covariance','beta1','beta2','p']).mean().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape,iv_coef_ape
0,0.5,0.1,1.0,5,0.384711,0.284687,0.461622,0.304217,0.078832,0.336824,2.042171,2.847107,1.846874,3.616217,2.368239
1,0.5,1.0,1.0,5,1.294988,1.190368,1.361737,1.18725,0.993675,1.240627,0.18725,0.294988,0.190368,0.361737,0.240627
2,0.5,10.0,1.0,5,10.28128,10.17292,10.336066,10.189723,10.014172,10.22545,0.018972,0.028128,0.017292,0.033607,0.022545


In [24]:
output.loc[(output['p']==5)&(output['beta2']==1)&(output['covariance'] == 0.5)&(output['exp_of_var']=='yes')].groupby(['covariance','beta1','beta2','p']).std().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape,iv_coef_ape
0,0.5,0.1,1.0,5,,,,,,,,,,,
1,0.5,1.0,1.0,5,,,,,,,,,,,
2,0.5,10.0,1.0,5,,,,,,,,,,,


In [25]:
output.loc[(output['beta1']==1)&(output['p']==5)&(output['covariance'] == 0.5)&(output['exp_of_var']=='yes')].groupby(['covariance','beta1','beta2','p']).mean().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape,iv_coef_ape
0,0.5,1.0,0.1,5,1.023938,1.013558,1.047948,1.022123,1.000814,1.023621,0.022123,0.023938,0.013558,0.047948,0.023621
1,0.5,1.0,1.0,5,1.294988,1.190368,1.361737,1.18725,0.993675,1.240627,0.18725,0.294988,0.190368,0.361737,0.240627
2,0.5,1.0,10.0,5,3.753425,2.661201,4.314199,2.795113,0.957088,3.11005,1.795113,2.753425,1.661201,3.314199,2.11005


In [26]:
output.loc[(output['beta1']==1)&(output['p']==5)&(output['covariance'] == 0.5)&(output['exp_of_var']=='yes')].groupby(['covariance','beta1','beta2','p']).std().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape,iv_coef_ape
0,0.5,1.0,0.1,5,,,,,,,,,,,
1,0.5,1.0,1.0,5,,,,,,,,,,,
2,0.5,1.0,10.0,5,,,,,,,,,,,


In [50]:
import regex as re

In [63]:
test_df = (output.loc[(output['beta1']==1)&(output['beta2']==1)&(output['p'] == 5)&(output['exp_of_var']=='yes')]
                 .groupby(['covariance','beta1','beta2','p'])
                 .mean()
                 .reset_index())

coefs_table = (test_df.transpose()
                      .reset_index()
                      .query('index.str.contains("coef")')
                      .query('index != "true_val_coef"')
                      .query('not index.str.contains("ape")'))

#print(re.sub(r'\\hline', '', re.sub(r'\\begin.+', '', tabulate(coefs_table, tablefmt="latex_raw"))))
with open("test.tex", "w") as f:
    corrected_table = coefs_table.to_latex(header = False, index = False)
    corrected_table = re.sub(r'\\begin.+', '', corrected_table)
    corrected_table = re.sub(r'\\bottomrule', '', corrected_table)
    corrected_table = re.sub(r'\\toprule', '', corrected_table)
    corrected_table = re.sub(r'\\end.+', '', corrected_table)
    f.write(corrected_table.strip())

In [28]:
output.loc[(output['beta1']==1)&(output['beta2']==1)&(output['p'] == 5)&(output['exp_of_var']=='yes')].groupby(['covariance','beta1','beta2','p']).std().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape,iv_coef_ape
0,-0.9,1.0,1.0,5,,,,,,,,,,,
1,-0.5,1.0,1.0,5,,,,,,,,,,,
2,0.0,1.0,1.0,5,,,,,,,,,,,
3,0.5,1.0,1.0,5,,,,,,,,,,,
4,0.9,1.0,1.0,5,,,,,,,,,,,


In [29]:
output.loc[(output['beta1']==1)&(output['beta2']==1)&(output['covariance'] == 0.5)&(output['exp_of_var']=='no')].groupby(['covariance','beta1','beta2','p','exp_of_var']).mean().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,exp_of_var,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape,iv_coef_ape
0,0.5,1.0,1.0,5,no,1.2599,1.065215,1.065952,1.065936,0.977238,1.081315,0.065936,0.2599,0.065215,0.065952,0.081315
1,0.5,1.0,1.0,20,no,1.325881,1.050691,1.052427,1.052862,1.015886,1.057282,0.052862,0.325881,0.050691,0.052427,0.057282
2,0.5,1.0,1.0,50,no,1.275242,1.028534,1.02827,1.028349,1.017811,1.039462,0.028349,0.275242,0.028534,0.02827,0.039462


In [30]:
output.loc[(output['beta1']==1)&(output['beta2']==1)&(output['covariance'] == 0.5)&(output['exp_of_var']=='no')].groupby(['covariance','beta1','beta2','p','exp_of_var']).std().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,exp_of_var,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape,iv_coef_ape
0,0.5,1.0,1.0,5,no,,,,,,,,,,,
1,0.5,1.0,1.0,20,no,,,,,,,,,,,
2,0.5,1.0,1.0,50,no,,,,,,,,,,,


In [31]:
output.loc[(output['p']==5)&(output['beta2']==1)&(output['covariance'] == 0.5)&(output['exp_of_var']=='no')].groupby(['covariance','beta1','beta2','p']).mean().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape,iv_coef_ape
0,0.5,0.1,1.0,5,0.416596,0.240255,0.237699,0.238001,0.110964,0.259016,1.380015,3.165958,1.402554,1.376989,1.59016
1,0.5,1.0,1.0,5,1.2599,1.065215,1.065952,1.065936,0.977238,1.081315,0.065936,0.2599,0.065215,0.065952,0.081315
2,0.5,10.0,1.0,5,10.255386,10.084698,10.084437,10.084169,9.982149,10.104177,0.008417,0.025539,0.00847,0.008444,0.010418


In [32]:
output.loc[(output['p']==5)&(output['beta2']==1)&(output['covariance'] == 0.5)&(output['exp_of_var']=='no')].groupby(['covariance','beta1','beta2','p']).std().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape,iv_coef_ape
0,0.5,0.1,1.0,5,,,,,,,,,,,
1,0.5,1.0,1.0,5,,,,,,,,,,,
2,0.5,10.0,1.0,5,,,,,,,,,,,


In [33]:
output.loc[(output['beta1']==1)&(output['p']==5)&(output['covariance'] == 0.5)&(output['exp_of_var']=='no')].groupby(['covariance','beta1','beta2','p']).mean().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape,iv_coef_ape
0,0.5,1.0,0.1,5,1.031123,1.014726,1.016047,1.015768,0.998597,1.019786,0.015768,0.031123,0.014726,0.016047,0.019786
1,0.5,1.0,1.0,5,1.2599,1.065215,1.065952,1.065936,0.977238,1.081315,0.065936,0.2599,0.065215,0.065952,0.081315
2,0.5,1.0,10.0,5,3.810465,1.987292,1.988081,1.986771,1.009551,2.223894,0.986771,2.810465,0.987292,0.988081,1.223894


In [34]:
output.loc[(output['beta1']==1)&(output['p']==5)&(output['covariance'] == 0.5)&(output['exp_of_var']=='no')].groupby(['covariance','beta1','beta2','p']).std().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape,iv_coef_ape
0,0.5,1.0,0.1,5,,,,,,,,,,,
1,0.5,1.0,1.0,5,,,,,,,,,,,
2,0.5,1.0,10.0,5,,,,,,,,,,,


In [35]:
output.loc[(output['beta1']==1)&(output['beta2']==1)&(output['p'] == 5)&(output['exp_of_var']=='no')].groupby(['covariance','beta1','beta2','p']).mean().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape,iv_coef_ape
0,-0.9,1.0,1.0,5,0.253317,0.557337,0.555925,0.556324,1.034771,0.521769,0.443676,0.746683,0.442663,0.444075,0.478231
1,-0.5,1.0,1.0,5,0.70326,0.889607,0.891001,0.889483,1.01172,0.875617,0.110517,0.29674,0.110393,0.108999,0.124383
2,0.0,1.0,1.0,5,0.99332,1.001567,1.001578,1.001842,0.993013,1.000098,0.001842,0.00668,0.001567,0.001578,9.8e-05
3,0.5,1.0,1.0,5,1.2599,1.065215,1.065952,1.065936,0.977238,1.081315,0.065936,0.2599,0.065215,0.065952,0.081315
4,0.9,1.0,1.0,5,1.776549,1.48989,1.488711,1.488764,1.062791,1.543923,0.488764,0.776549,0.48989,0.488711,0.543923


In [36]:
output.loc[(output['beta1']==1)&(output['beta2']==1)&(output['p'] == 5)&(output['exp_of_var']=='no')].groupby(['covariance','beta1','beta2','p']).std().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape,iv_coef_ape
0,-0.9,1.0,1.0,5,,,,,,,,,,,
1,-0.5,1.0,1.0,5,,,,,,,,,,,
2,0.0,1.0,1.0,5,,,,,,,,,,,
3,0.5,1.0,1.0,5,,,,,,,,,,,
4,0.9,1.0,1.0,5,,,,,,,,,,,
