In [None]:
#pip install pca

In [50]:
import numpy as np
import pandas as pd
import seaborn as sns
from pca import pca
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import FactorAnalysis
import math
from tqdm import tqdm

In [56]:
from contextlib import contextmanager
import sys, os

@contextmanager
def suppress_stdout():
    with open(os.devnull, "w") as devnull:
        old_stdout = sys.stdout
        sys.stdout = devnull
        try:  
            yield
        finally:
            sys.stdout = old_stdout

In [None]:
output = pd.DataFrame()
N = 2000

for beta1 in [0.1,1,10]:
    for beta2 in [0.1,1,10]:
        for covariance in [-0.9,-0.5,0,0.5,0.9]:
            for p in [5,20,50]:
                counter = 0
                if beta1 == 1:
                    counter += 1
                if beta2 == 1:
                    counter += 1
                if covariance == 0.5:
                    counter += 1
                if p == 5:
                    counter+=1
                if counter >= 3:
                    for exp_of_var in ['yes','no']:
                        for k in tqdm(range(50)):
                            # Initialize Lists
                            pca_coef = []
                            mismeasured_coef = []
                            mismeasured_allvar_coef = []
                            mismeasured_avg_coef = []
                            iv_coef = []
                            true_val_coef =[]

                            # Create variables
                            vars_mean = [0,0,0]
                            vars_cov = np.array([[1,covariance,0],
                                                 [covariance,1,0],
                                                 [0,0,1]])
                            vars_ = pd.DataFrame(np.random.multivariate_normal(vars_mean, vars_cov, N), columns = ['x','true_z','u'])
                            vars_['y'] = beta1 * vars_['x'] + beta2 * vars_['true_z'] + vars_['u']

                            # Create measurement errors
                            errors_mean = np.zeros(p)
                            errors_cov = np.zeros((p,p))
                            for i in range(p):
                                for j in range(p):
                                    if i == j:
                                        errors_cov[i,j] = 1

                            errors = np.random.multivariate_normal(errors_mean, errors_cov, N)
                            z_vars = []
                            for i in range(p):
                                z_vars.append('z'+str(i+1))
                            mismeasured_z = pd.DataFrame(errors, columns = z_vars)
                            for i in mismeasured_z.columns:
                                mismeasured_z[i] = mismeasured_z[i] + vars_['true_z']

                            # Take e to the power of the values for half of the measurements if log_of_var is true
                            if exp_of_var == 'yes':
                                mismeasured_z.iloc[:,int(len(mismeasured_z.columns)/2):] =np.exp(mismeasured_z.iloc[:,int(len(mismeasured_z.columns)/2):])
                                
                            # Do feature scaling (normalize to mean 0 and variance 1)
                            scaled_mismeasured_z = mismeasured_z.copy()
                            for i in mismeasured_z.columns:
                                scaled_mismeasured_z[i] = (mismeasured_z[i] - mismeasured_z[i].mean()) / mismeasured_z[i].std()

                            with suppress_stdout():
                                # Use PCA on the mismeasured values
                                pca_model = pca()
                                pca_results = pca_model.fit_transform(scaled_mismeasured_z)
                                pca_z = pca_results['PC']['PC1']

                            # Average mismeasured variables:
                            vars_['avg_mismeasured_z'] = mismeasured_z[z_vars].mean(axis=1)

                            # Add relevant variables to vars_ dataframe
                            vars_[mismeasured_z.columns] = mismeasured_z
                            vars_['pca_z'] = pca_z

                            # Mismeasured Results
                            model_mismeasured = sm.OLS(vars_['y'],vars_[['x','z1']])
                            results_mismeasured = model_mismeasured.fit()
                            mismeasured_coef.append(results_mismeasured.params[0])

                            # All Variables Mismeasured Results
                            tot_vars = ['x']
                            tot_vars.extend(z_vars)
                            model_mismeasured_allvar = sm.OLS(vars_['y'],vars_[tot_vars])
                            results_mismeasured_allvar = model_mismeasured_allvar.fit()
                            mismeasured_allvar_coef.append(results_mismeasured_allvar.params[0])

                            # Average Mismeasured Variables Results
                            model_mismeasured_avg = sm.OLS(vars_['y'],vars_[['x','avg_mismeasured_z']])
                            results_mismeasured_avg = model_mismeasured_avg.fit()
                            mismeasured_avg_coef.append(results_mismeasured_avg.params[0])

                            # PCA Results
                            model_pca = sm.OLS(vars_['y'],vars_[['x','pca_z']])
                            results_pca = model_pca.fit()
                            pca_coef.append(results_pca.params[0])

                            # Instrumental Variables Results
                            z_string = mismeasured_z.columns[1]
                            for z in mismeasured_z.columns[2:]:
                                z_string = z_string + ' + ' + z
                            # Create the predicted value of z1
                            vars_['pred_z1'] = smf.ols("z1 ~ " + z_string, data = mismeasured_z).fit().predict()
                            iv_results = smf.ols("y ~ x + pred_z1 -1", data = vars_).fit()
                            iv_coef.append(iv_results.params[0])

                            # True Results
                            model_true = sm.OLS(vars_['y'],vars_[['x','true_z']])
                            results_true = model_true.fit()
                            true_val_coef.append(results_true.params[0])

                            # Output Findings
                            new_output = pd.DataFrame()
                            new_output['mismeasured_coef'] = mismeasured_coef
                            new_output['mismeasured_allvar_coef'] = mismeasured_allvar_coef
                            new_output['mismeasured_avg_coef'] = mismeasured_avg_coef
                            new_output['pca_coef'] = pca_coef
                            new_output['true_val_coef'] = true_val_coef
                            new_output['iv_coef'] = iv_coef
                            new_output['covariance'] = vars_cov[0][1]
                            new_output['beta1'] = beta1
                            new_output['beta2'] = beta2
                            new_output['p'] = p
                            new_output['exp_of_var'] = exp_of_var
                            output = output.append(new_output)

output

 74%|████████████████████████████████████████████████████████████▋                     | 37/50 [00:26<00:08,  1.51it/s]

In [54]:
output

Unnamed: 0,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,covariance,beta1,beta2,p,exp_of_var
0,0.359006,0.254973,0.461922,0.314337,0.100295,0.32464,0.5,0.1,1.0,5,yes
0,0.352554,0.204798,0.202172,0.202516,0.093895,0.224741,0.5,0.1,1.0,5,no
0,1.032953,1.036328,1.042672,1.032967,1.012558,1.040077,0.5,1.0,0.1,5,yes
0,1.098744,1.102875,1.104789,1.104469,1.104131,1.110936,0.5,1.0,0.1,5,no
0,0.226803,0.367762,0.167495,0.315336,1.015245,0.301445,-0.9,1.0,1.0,5,yes
0,0.299714,0.564389,0.560785,0.560543,1.004874,0.495016,-0.9,1.0,1.0,5,no
0,0.734387,0.86321,0.684694,0.856965,1.020789,0.830668,-0.5,1.0,1.0,5,yes
0,0.682936,0.88447,0.884072,0.884457,0.980014,0.863093,-0.5,1.0,1.0,5,no
0,0.982017,0.973798,0.979104,0.970243,0.976085,0.967649,0.0,1.0,1.0,5,yes
0,0.975853,0.972519,0.972316,0.97259,0.978884,0.969462,0.0,1.0,1.0,5,no


In [47]:
pca_coef = []
mismeasured_coef = []
mismeasured_allvar_coef = []
mismeasured_avg_coef = []
iv_coef = []
true_val_coef =[]

# Create variables
vars_mean = [0,0,0]
vars_cov = np.array([[1,covariance,0],
                     [covariance,1,0],
                     [0,0,1]])
vars_ = pd.DataFrame(np.random.multivariate_normal(vars_mean, vars_cov, N), columns = ['x','true_z','u'])
vars_['y'] = beta1 * vars_['x'] + beta2 * vars_['true_z'] + vars_['u']

# Create measurement errors
errors_mean = np.zeros(p)
errors_cov = np.zeros((p,p))
for i in range(p):
    for j in range(p):
        if i == j:
            errors_cov[i,j] = 1

errors = np.random.multivariate_normal(errors_mean, errors_cov, N)
z_vars = []
for i in range(p):
    z_vars.append('z'+str(i+1))
mismeasured_z = pd.DataFrame(errors, columns = z_vars)
for i in mismeasured_z.columns:
    mismeasured_z[i] = mismeasured_z[i] + vars_['true_z']

mismeasured_z.iloc[:,int(len(mismeasured_z.columns)/2):] =np.exp(mismeasured_z.iloc[:,int(len(mismeasured_z.columns)/2):])
       
# Do feature scaling (normalize to mean 0 and variance 1)
scaled_mismeasured_z = mismeasured_z.copy()
for i in mismeasured_z.columns:
    scaled_mismeasured_z[i] = (mismeasured_z[i] - mismeasured_z[i].mean()) / mismeasured_z[i].std()

# Use PCA on the mismeasured values
pca_model = pca()
pca_results = pca_model.fit_transform(scaled_mismeasured_z)
pca_z = pca_results['PC']['PC1']

# Average mismeasured variables:
vars_['avg_mismeasured_z'] = mismeasured_z[z_vars].mean(axis=1)

# Add relevant variables to vars_ dataframe
vars_[mismeasured_z.columns] = mismeasured_z
vars_['pca_z'] = pca_z

# Mismeasured Results
model_mismeasured = sm.OLS(vars_['y'],vars_[['x','z1']])
results_mismeasured = model_mismeasured.fit()
mismeasured_coef.append(results_mismeasured.params[0])

# All Variables Mismeasured Results
tot_vars = ['x']
tot_vars.extend(z_vars)
model_mismeasured_allvar = sm.OLS(vars_['y'],vars_[tot_vars])
results_mismeasured_allvar = model_mismeasured_allvar.fit()
mismeasured_allvar_coef.append(results_mismeasured_allvar.params[0])

# Average Mismeasured Variables Results
model_mismeasured_avg = sm.OLS(vars_['y'],vars_[['x','avg_mismeasured_z']])
results_mismeasured_avg = model_mismeasured_avg.fit()
mismeasured_avg_coef.append(results_mismeasured_avg.params[0])

# PCA Results
model_pca = sm.OLS(vars_['y'],vars_[['x','pca_z']])
results_pca = model_pca.fit()
pca_coef.append(results_pca.params[0])

# Instrumental Variables Results
z_string = mismeasured_z.columns[1]
for z in mismeasured_z.columns[2:]:
    z_string = z_string + ' + ' + z
# Create the predicted value of z1
vars_['pred_z1'] = smf.ols("z1 ~ " + z_string, data = mismeasured_z).fit().predict()
iv_results = smf.ols("y ~ x + pred_z1 -1", data = vars_).fit()
iv_coef.append(iv_results.params[0])

# True Results
model_true = sm.OLS(vars_['y'],vars_[['x','true_z']])
results_true = model_true.fit()
true_val_coef.append(results_true.params[0])

# Output Findings
new_output = pd.DataFrame()
new_output['mismeasured_coef'] = mismeasured_coef
new_output['mismeasured_allvar_coef'] = mismeasured_allvar_coef
new_output['mismeasured_avg_coef'] = mismeasured_avg_coef
new_output['pca_coef'] = pca_coef
new_output['true_val_coef'] = true_val_coef
new_output['iv_coef'] = iv_coef
new_output['covariance'] = vars_cov[0][1]
new_output['beta1'] = beta1
new_output['beta2'] = beta2
new_output['p'] = p
output = output.append(new_output)

new_output

[pca] >Processing dataframe..
[pca] >The PCA reduction is performed to capture [95.0%] explained variance using the [50] columns of the input data.
[pca] >Fitting using PCA..
[pca] >Computing loadings and PCs..
[pca] >Computing explained variance..
[pca] >Number of components is [44] that covers the [95.00%] explained variance.
[pca] >Outlier detection using Hotelling T2 test with alpha=[0.05] and n_components=[5]
[pca] >Outlier detection using SPE/DmodX with n_std=[2]


Unnamed: 0,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,iv_coef,covariance,beta1,beta2,p
0,17.564238,11.555221,17.527687,12.829206,10.021183,12.277741,0.9,10,10,50


In [45]:
np.exp(mismeasured_z.iloc[:,int(len(mismeasured_z.columns)/2):])

Unnamed: 0,z26,z27,z28,z29,z30,z31,z32,z33,z34,z35,...,z41,z42,z43,z44,z45,z46,z47,z48,z49,z50
0,0.298431,1.575551,0.334491,0.848803,1.730671,2.797961,0.355284,1.403701,0.061266,0.899059,...,0.272675,0.155353,0.671366,0.732897,5.479501,2.357370,2.467747,1.596819,0.817203,0.936529
1,3.721548,0.572476,4.231351,20.735490,0.522629,0.216295,0.336313,1.288644,0.938554,0.641830,...,9.115864,1.152239,0.989162,2.145199,0.761490,1.123665,0.738967,1.443002,7.121029,3.806385
2,0.931946,0.623445,0.526749,2.100512,3.098231,0.912152,1.311980,2.810424,0.794012,0.325323,...,0.178850,1.234117,0.423721,0.759814,1.719093,2.116701,1.321168,0.512055,3.248123,1.202766
3,1.519173,1.346042,0.076208,0.163278,0.352734,0.293031,0.414741,0.649074,0.701087,0.309854,...,0.415287,0.759342,0.344691,0.144702,0.382718,0.121771,0.139808,0.289200,0.628790,0.799331
4,1.467304,1.042534,0.518951,10.518426,0.093846,0.603981,0.353013,1.119737,1.321163,2.237955,...,0.463804,1.545646,1.472396,0.896431,0.513702,0.195902,0.820146,4.603684,0.594662,8.365477
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0.029560,0.014904,0.135512,0.016430,0.110326,0.645040,0.089742,0.180925,0.627829,0.511855,...,0.040107,0.081054,0.128136,0.068308,0.171004,0.097980,0.075788,0.030121,0.077720,0.135522
1996,0.389362,0.215854,0.091573,0.577933,1.154483,6.531631,0.125024,0.207522,0.500301,0.033707,...,1.821243,0.211754,0.297657,0.202635,0.795872,0.099394,0.370973,0.215426,0.415937,1.275888
1997,0.312428,0.250798,0.198383,1.627318,0.349519,0.525198,0.358730,0.062925,0.047516,0.699385,...,0.073775,0.164790,0.356689,0.220363,0.288886,0.248002,0.631637,0.148287,0.317767,0.094200
1998,1.837706,7.292223,0.515530,0.399024,0.174395,0.434833,0.824082,1.563473,0.980072,0.272010,...,0.951327,0.697621,0.672708,1.784873,1.071517,0.887853,4.317440,5.000274,0.288429,1.694916


In [8]:
output.to_csv('C://Users//paulo//Documents//Econometrics and ML//Final Project//n_2000_results.csv')

In [28]:
pd.set_option('display.max_rows', 10)

In [9]:
output['pca_ape'] = (output['pca_coef'] - output['beta1']).abs()/output['beta1']
output['mismeasured_coef_ape'] = (output['mismeasured_coef'] - output['beta1']).abs()/output['beta1']
output['mismeasured_allvar_coef_ape'] = (output['mismeasured_allvar_coef'] - output['beta1']).abs()/output['beta1']
output['mismeasured_avg_coef_ape'] = (output['mismeasured_avg_coef'] - output['beta1']).abs()/output['beta1']

In [10]:
output.groupby(['covariance','beta1','beta2','p']).mean().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape
0,-0.9,1.0,1.0,5,0.244402,0.539647,0.539679,0.539015,1.001093,0.460985,0.755598,0.460353,0.460321
1,-0.5,1.0,1.0,5,0.714149,0.89403,0.89398,0.893722,0.999705,0.106278,0.285851,0.10597,0.10602
2,0.0,1.0,1.0,5,0.998864,0.999293,0.999322,0.999327,0.999421,0.019596,0.021828,0.019635,0.019612
3,0.5,0.1,1.0,5,0.385756,0.206114,0.206042,0.206258,0.100858,1.062582,2.857558,1.06114,1.060423
4,0.5,1.0,0.1,5,1.028871,1.010995,1.010988,1.011023,1.000367,0.021316,0.03099,0.021309,0.021293
5,0.5,1.0,1.0,5,1.284736,1.103504,1.103538,1.103772,0.998664,0.103772,0.284736,0.103504,0.103538
6,0.5,1.0,1.0,10,1.285694,1.058323,1.058351,1.058652,0.999323,0.058835,0.285694,0.058523,0.058553
7,0.5,1.0,1.0,20,1.28382,1.029584,1.029578,1.029873,0.998295,0.032897,0.28382,0.032671,0.032673
8,0.5,1.0,1.0,50,1.284698,1.01116,1.01105,1.011357,0.998213,0.022726,0.284698,0.022883,0.022609
9,0.5,1.0,10.0,5,3.863553,2.054715,2.054825,2.057738,0.999296,1.057738,2.863553,1.054715,1.054825


In [11]:
output.groupby(['covariance','beta1','beta2','p']).std().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape
0,-0.9,1.0,1.0,5,0.031691,0.041496,0.04142,0.041432,0.050989,0.041432,0.031691,0.041496,0.04142
1,-0.5,1.0,1.0,5,0.028706,0.027214,0.027115,0.027131,0.026416,0.027131,0.028706,0.027214,0.027115
2,0.0,1.0,1.0,5,0.026979,0.024444,0.024389,0.024388,0.022925,0.01452,0.015882,0.014563,0.0145
3,0.5,0.1,1.0,5,0.029326,0.027558,0.027524,0.027523,0.026162,0.275232,0.29326,0.275582,0.275237
4,0.5,1.0,0.1,5,0.023741,0.02482,0.024815,0.024836,0.025131,0.016842,0.020898,0.016807,0.016817
5,0.5,1.0,1.0,5,0.02827,0.027473,0.02744,0.027478,0.026511,0.027478,0.02827,0.027473,0.02744
6,0.5,1.0,1.0,10,0.028983,0.027226,0.027152,0.027186,0.026378,0.026786,0.028983,0.026792,0.026714
7,0.5,1.0,1.0,20,0.029216,0.026468,0.02632,0.026353,0.025664,0.022461,0.029216,0.022542,0.022358
8,0.5,1.0,1.0,50,0.027874,0.026194,0.025895,0.025911,0.025711,0.016837,0.027874,0.016931,0.016766
9,0.5,1.0,10.0,5,0.165269,0.101216,0.100996,0.1016,0.025588,0.1016,0.165269,0.101216,0.100996


In [12]:
output.loc[(output['beta1']==1)&(output['beta2']==1)&(output['covariance'] == 0.5)].groupby(['covariance','beta1','beta2','p']).mean().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape
0,0.5,1.0,1.0,5,1.284736,1.103504,1.103538,1.103772,0.998664,0.103772,0.284736,0.103504,0.103538
1,0.5,1.0,1.0,10,1.285694,1.058323,1.058351,1.058652,0.999323,0.058835,0.285694,0.058523,0.058553
2,0.5,1.0,1.0,20,1.28382,1.029584,1.029578,1.029873,0.998295,0.032897,0.28382,0.032671,0.032673
3,0.5,1.0,1.0,50,1.284698,1.01116,1.01105,1.011357,0.998213,0.022726,0.284698,0.022883,0.022609


In [16]:
output.loc[(output['beta1']==1)&(output['beta2']==1)&(output['covariance'] == 0.5)].groupby(['covariance','beta1','beta2','p']).std().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape
0,0.5,1.0,1.0,5,0.02827,0.027473,0.02744,0.027478,0.026511,0.027478,0.02827,0.027473,0.02744
1,0.5,1.0,1.0,10,0.028983,0.027226,0.027152,0.027186,0.026378,0.026786,0.028983,0.026792,0.026714
2,0.5,1.0,1.0,20,0.029216,0.026468,0.02632,0.026353,0.025664,0.022461,0.029216,0.022542,0.022358
3,0.5,1.0,1.0,50,0.027874,0.026194,0.025895,0.025911,0.025711,0.016837,0.027874,0.016931,0.016766


In [13]:
output.loc[(output['p']==5)&(output['beta2']==1)&(output['covariance'] == 0.5)].groupby(['covariance','beta1','beta2','p']).mean().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape
0,0.5,0.1,1.0,5,0.385756,0.206114,0.206042,0.206258,0.100858,1.062582,2.857558,1.06114,1.060423
1,0.5,1.0,1.0,5,1.284736,1.103504,1.103538,1.103772,0.998664,0.103772,0.284736,0.103504,0.103538
2,0.5,10.0,1.0,5,10.28618,10.105493,10.105437,10.105679,9.999465,0.010568,0.028618,0.010549,0.010544
3,0.5,100.0,1.0,5,100.286184,100.105593,100.10558,100.105846,100.000166,0.001058,0.002862,0.001056,0.001056


In [17]:
output.loc[(output['p']==5)&(output['beta2']==1)&(output['covariance'] == 0.5)].groupby(['covariance','beta1','beta2','p']).std().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape
0,0.5,0.1,1.0,5,0.029326,0.027558,0.027524,0.027523,0.026162,0.275232,0.29326,0.275582,0.275237
1,0.5,1.0,1.0,5,0.02827,0.027473,0.02744,0.027478,0.026511,0.027478,0.02827,0.027473,0.02744
2,0.5,10.0,1.0,5,0.028685,0.026982,0.027032,0.027022,0.025972,0.002702,0.002869,0.002698,0.002703
3,0.5,100.0,1.0,5,0.028351,0.027634,0.027657,0.027696,0.026388,0.000277,0.000284,0.000276,0.000277


In [14]:
output.loc[(output['beta1']==1)&(output['p']==5)&(output['covariance'] == 0.5)].groupby(['covariance','beta1','beta2','p']).mean().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape
0,0.5,1.0,0.1,5,1.028871,1.010995,1.010988,1.011023,1.000367,0.021316,0.03099,0.021309,0.021293
1,0.5,1.0,1.0,5,1.284736,1.103504,1.103538,1.103772,0.998664,0.103772,0.284736,0.103504,0.103538
2,0.5,1.0,10.0,5,3.863553,2.054715,2.054825,2.057738,0.999296,1.057738,2.863553,1.054715,1.054825
3,0.5,1.0,100.0,5,29.554625,11.519244,11.519468,11.546792,0.999807,10.546792,28.554625,10.519244,10.519468


In [18]:
output.loc[(output['beta1']==1)&(output['p']==5)&(output['covariance'] == 0.5)].groupby(['covariance','beta1','beta2','p']).std().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape
0,0.5,1.0,0.1,5,0.023741,0.02482,0.024815,0.024836,0.025131,0.016842,0.020898,0.016807,0.016817
1,0.5,1.0,1.0,5,0.02827,0.027473,0.02744,0.027478,0.026511,0.027478,0.02827,0.027473,0.02744
2,0.5,1.0,10.0,5,0.165269,0.101216,0.100996,0.1016,0.025588,0.1016,0.165269,0.101216,0.100996
3,0.5,1.0,100.0,5,1.516332,0.974031,0.971576,0.972906,0.025473,0.972906,1.516332,0.974031,0.971576


In [15]:
output.loc[(output['beta1']==1)&(output['beta2']==1)&(output['p'] == 5)].groupby(['covariance','beta1','beta2','p']).mean().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape
0,-0.9,1.0,1.0,5,0.244402,0.539647,0.539679,0.539015,1.001093,0.460985,0.755598,0.460353,0.460321
1,-0.5,1.0,1.0,5,0.714149,0.89403,0.89398,0.893722,0.999705,0.106278,0.285851,0.10597,0.10602
2,0.0,1.0,1.0,5,0.998864,0.999293,0.999322,0.999327,0.999421,0.019596,0.021828,0.019635,0.019612
3,0.5,1.0,1.0,5,1.284736,1.103504,1.103538,1.103772,0.998664,0.103772,0.284736,0.103504,0.103538
4,0.9,1.0,1.0,5,1.755877,1.462156,1.462116,1.462879,1.001033,0.462879,0.755877,0.462156,0.462116


In [19]:
output.loc[(output['beta1']==1)&(output['beta2']==1)&(output['p'] == 5)].groupby(['covariance','beta1','beta2','p']).std().reset_index()

Unnamed: 0,covariance,beta1,beta2,p,mismeasured_coef,mismeasured_allvar_coef,mismeasured_avg_coef,pca_coef,true_val_coef,pca_ape,mismeasured_coef_ape,mismeasured_allvar_coef_ape,mismeasured_avg_coef_ape
0,-0.9,1.0,1.0,5,0.031691,0.041496,0.04142,0.041432,0.050989,0.041432,0.031691,0.041496,0.04142
1,-0.5,1.0,1.0,5,0.028706,0.027214,0.027115,0.027131,0.026416,0.027131,0.028706,0.027214,0.027115
2,0.0,1.0,1.0,5,0.026979,0.024444,0.024389,0.024388,0.022925,0.01452,0.015882,0.014563,0.0145
3,0.5,1.0,1.0,5,0.02827,0.027473,0.02744,0.027478,0.026511,0.027478,0.02827,0.027473,0.02744
4,0.9,1.0,1.0,5,0.031621,0.040444,0.040354,0.040268,0.050982,0.040268,0.031621,0.040444,0.040354
