In [65]:
#!/usr/bin/env python
# coding: utf-8

# # Implementation of Theorem 2: unordered family panel
# _Jiacheng Zou, Aug 23 2022_
# We implement Theorem 2 where 
# * input: a sparse 'matrix' of $p$-values, family-wise error rate target $\gamma$
# * output: selected factors.

# In[4]:


import numpy as np
import numpy.random as rnd
from numpy.linalg import pinv
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import linear_model
from sklearn.linear_model import LassoCV,Lasso,LassoLarsCV
import statsmodels.api as sm
from statsmodels.regression.linear_model import OLS
from scipy import stats
from scipy.stats import truncnorm,norm
from scipy.linalg import toeplitz
from sklearn.metrics import r2_score


# In[5]:


np.seterr(divide = 'ignore') 
rnd.seed(1)


# Without loss of generality, we can assume the non-null $\beta$'s to be negative, so their $p=\Phi(\beta)$.
# 
# * w.p. $1-s/d$, we observe $p$-values for true factors;
# * w.p. $1-exp(-1/d)$, we observe $p$-values for null factors;

# In[130]:




# In[93]:

In [66]:
def simulate(NumSimulation,gamma_vec,d_vec,N_vec,T_obs,s_vec,s_weak,noise_stds,factor_strengths,xsec_covs):
    
    simulation_result = []
    for N in N_vec:
        for s in s_vec:
            for factor_strength in factor_strengths:
                for d in d_vec:
                    for noise_std in noise_stds:
                        for xsec_cov in xsec_covs:
                            for i_sim in range(NumSimulation):

                                all_response,all_covariates=generate_sample(d,N,T_obs,s,
                                                                            noise_std,factor_strength,xsec_cov)

                                omega_inv_vec = np.ones(d)

                                posi_log_pval_matrix = np.nan*np.ones((d,N))
                                t_log_pval_matrix = np.nan*np.ones((d,N))
                                OLS_log_pval_matrix=np.nan*np.ones((d,N))

                                T_obs_train_=int(T_obs*.5)
                                covariates = all_covariates[:T_obs_train_,:]
                                response = all_response[:T_obs_train_,:]


                                ####################
                                # Unknown variance case
                                selection_result_dict,  curr_rho = selection_unk_noise_methods(covariates,response,
                                                                                    omega_inv_vec,gamma_vec)

                                names =[]
                                ooR2=[]
                                ooRMS=[]
                                selections=[]
                                false_selections=[]
                                strong_selects = []
                                weak_selects = []
                                for gamma, curr_selection_result in selection_result_dict.items():
                                    for key, item in curr_selection_result.items():
                                        ooR2_,ooRMS_ = performance(all_response,all_covariates[:,item],N,T_obs_train_)
                                        names.append(key)
                                        ooR2.append(ooR2_)
                                        ooRMS.append(ooRMS_)
                                        selections.append(len(item))
                                        false_vars = [i_var for i_var in item if i_var>=(s+s_weak)]
                                        false_selections.append(len(false_vars))
                                        strong_selects.append( len([i_var for i_var in item if i_var<s]))
                                        weak_selects.append( len([i_var for i_var in item if (i_var>=s) & (i_var<(s+s_weak))]))

                                    curr_sim=pd.DataFrame({'names':names,'OOS R2':ooR2,'OOS RMS':ooRMS,
                                                          'N Selections':selections, 'N False Selections':false_selections,
                                                          'N Strong':strong_selects, 'N Weak':weak_selects})
                                    curr_sim['sim']=i_sim
                                    curr_sim['N']=N
                                    curr_sim['d']=d
                                    curr_sim['T']=T_obs
                                    curr_sim['noise_unk_std']='Unknown'
                                    curr_sim['noise_std']=noise_std
                                    curr_sim['gamma'] = gamma
                                    curr_sim['factor_strength']=factor_strength
                                    curr_sim['xsec_cov']=xsec_cov
                                    curr_sim['s']=s
                                    curr_sim['rho']=curr_rho

                                    simulation_result.append(curr_sim)


                            print(i_sim,'done')
    simulation_result_joint_df=pd.concat(simulation_result,axis=0)
    
    return simulation_result_joint_df


# In[127]:


def trunc_t(studentized_posi_vec,trunc_a_vec,trunc_b_vec,df,min_B):
    B = 10000
    u_vec=rnd.uniform(low=0,high=1,size=B).reshape((B,1))


    G_B=stats.t.cdf(trunc_b_vec,df).reshape((1,len(trunc_b_vec)))
    G_A=stats.t.cdf(trunc_a_vec,df).reshape((1,len(trunc_b_vec)))

    sample_matrix = np.dot(u_vec,(G_B-G_A))+G_A

    trunc_quantiles = stats.t.ppf(sample_matrix,df)

    count_pos=B-np.sum(studentized_posi_vec[studentized_posi_vec>0]>trunc_quantiles[:,studentized_posi_vec>0],axis=0)+\
    np.sum(-studentized_posi_vec[studentized_posi_vec>0]>trunc_quantiles[:,studentized_posi_vec>0],axis=0)

    count_neg=B-np.sum(-studentized_posi_vec[studentized_posi_vec<0]>trunc_quantiles[:,studentized_posi_vec<0],axis=0)+\
    np.sum(studentized_posi_vec[studentized_posi_vec<0]>trunc_quantiles[:,studentized_posi_vec<0],axis=0)

    count_agg = np.ones_like(studentized_posi_vec)*np.nan
    count_agg[studentized_posi_vec>0]=count_pos
    count_agg[studentized_posi_vec<0]=count_neg
    raw_return = np.log(count_agg)-np.log(B)
    # print('Very extreme t vals',studentized_posi_vec[raw_return<=-np.log(B)])
    raw_return[raw_return<=-np.log(B)]=-np.log(min_B)
    return raw_return


def generate_sample(d,N,T_obs,s,noise_std,strong_factor_uniform_bound,xsec_cov):
    full_beta=np.zeros((d,N))
    
    # nonnull_beta = rnd.normal(loc=0,size=(s,N))
    
    
    splitted=np.split(np.arange(N),s)
    # splitted = np.split(np.arange(int(N*s/(s+1))),s)
    for i_factor in range(s):
        
        curr_units=np.concatenate(splitted[-(i_factor):])
        
        # active_units = curr_units[rnd.uniform(0,1,len(curr_units))>0.5]
        full_beta[i_factor,curr_units]=rnd.uniform(low=-strong_factor_uniform_bound,high=strong_factor_uniform_bound,size=len(curr_units))
      
    
    noise_cov = np.eye(N)*noise_std

    for i_unit in range(N):
        other_factor = np.delete(np.arange(N),i_unit)
        for j_factor in other_factor:
            noise_cov[i_unit,j_factor] = xsec_cov
    noises = rnd.multivariate_normal(mean=np.zeros(N),cov=noise_cov,size = T_obs)
    # all_covariates = rnd.uniform(low=0,high=strong_factor_uniform_bound,size=(T_obs,d))
    all_covariates = rnd.normal(size=(T_obs,d))
   
    
    all_response=np.matmul(all_covariates,full_beta)+noises    
    return all_response,all_covariates

def panel_posi_unordered(log_pval_matrix,gamma):
    log_pval_matrix = log_pval_matrix.copy()
    M_set = (~np.isnan(log_pval_matrix)).sum(axis=0)
    K_set = (~np.isnan(log_pval_matrix)).sum(axis=1)
    simultaneity_count_array = np.zeros(shape=log_pval_matrix.shape[0])
    for i in range(log_pval_matrix.shape[0]):
        simultaneity_count_array[i] = np.sum(M_set[np.where(~np.isnan(log_pval_matrix)[i,:])[0]])
        
    log_pval_matrix[np.isnan(log_pval_matrix)] = np.inf
    smallest_log_pval_array=np.nanmin(log_pval_matrix,axis=1) 
    rho_inv = np.sum(K_set[simultaneity_count_array>0]/simultaneity_count_array[simultaneity_count_array>0])
    rho = 1/rho_inv

    thresholds = np.log(gamma)-np.log(simultaneity_count_array)+np.log(rho)
    bonf_thresholds = np.log(gamma)-np.log(log_pval_matrix.shape[0])-np.log(log_pval_matrix.shape[1])

    selection_result = np.where((smallest_log_pval_array<=thresholds) & (simultaneity_count_array>0))[0]
    bonf_selection_result = np.where((smallest_log_pval_array<=bonf_thresholds) & (simultaneity_count_array>0))[0]
    # print('p-val array:',np.exp(smallest_log_pval_array)[(simultaneity_count_array>0)])
    # print('rho:',rho)
    # print('Localized:',rho*simultaneity_count_array[(simultaneity_count_array>0)])
    # print('Bonf:',log_pval_matrix.shape[0]*log_pval_matrix.shape[1])
    return selection_result, rho, bonf_selection_result

def selection_unk_noise_methods(covariates,response,omega_inv_vec,gamma_vec):
    d=covariates.shape[1]
    N=response.shape[1]
    T_obs_train_ = covariates.shape[0]
    
    posi_log_pval_matrix=np.nan*np.ones((d,N))
    t_log_pval_matrix=np.nan*np.ones((d,N))
    
    for i_unit in range(N):

        lars = LassoLarsCV(cv=5,fit_intercept=False,normalize=False,max_n_alphas=d)

        lars_fitted=lars.fit(X=covariates,y=response[:,i_unit])

        mse_CVed=lars_fitted.mse_path_.mean(axis=1)

        # picked_alpha = lars_fitted.cv_alphas_[np.argmin(mse_CVed)]
        picked_alpha = lars_fitted.cv_alphas_[np.max(np.where(mse_CVed<=2*min(mse_CVed)))]
        lasso = Lasso(alpha=picked_alpha)

        lasso_fitted = lasso.fit(X=covariates,y=response[:,i_unit],)

        lasso_lambda = picked_alpha
        y=response[:,i_unit]

        active_set = lasso_fitted.coef_!=0

        X_M = covariates[:,active_set]
        omega_inv_M  = omega_inv_vec[active_set]
        omega_inv_notM  = omega_inv_vec[~active_set]

        X_notM = covariates[:,~active_set]

        X_M_card=X_M.shape[1]
        if X_M_card==0:
            continue
        ols_post_lasso=OLS(endog=y,exog=X_M)

        ols_post_lasso_fitted=ols_post_lasso.fit()

        beta_bar = ols_post_lasso_fitted.params
        
        X_M_gram = np.matmul(X_M.transpose(),X_M)

        X_M_gram_inv=pinv(X_M_gram)

        X_M_pseudo_inv=np.matmul(X_M_gram_inv,X_M.transpose())
        this_df = max(1,T_obs_train_-X_M_card)
        estimated_var=np.sum(np.power(y-lasso_fitted.predict(covariates),2))/this_df        
        Sigma = np.eye(T_obs_train_)*estimated_var
        p_raw_vec = np.zeros(X_M_card)
        studentized_posi_vec,trunc_a_vec,trunc_b_vec= np.zeros(X_M_card), np.zeros(X_M_card), np.zeros(X_M_card)
        
        # print('beta bar',beta_bar)
        # print("(X'X)^-1",X_M_gram_inv.diagonal())
        # print('s',np.sqrt(estimated_var*(X_M_gram_inv.diagonal())))

        for i_covariate in range(X_M_card):

            eta=np.reshape(X_M_pseudo_inv[i_covariate,:],newshape=(T_obs_train_,1))
            
            # var_beta_bar=np.matmul(np.matmul(eta.transpose(),Sigma),eta)
            # std_beta_bar = np.sqrt(var_beta_bar)
            var_beta_bar = estimated_var*(X_M_gram_inv.diagonal())[i_covariate]
            std_beta_bar = np.sqrt(var_beta_bar)
            
            # print('Estimated sigma(beta)',std_beta_bar)
            xi=np.reshape(np.matmul(Sigma,eta)/var_beta_bar,newshape=(T_obs_train_,1))

            z =np.matmul( np.eye(T_obs_train_)-np.matmul(xi,eta.transpose()), y)

            s_vec=np.sign(lasso_fitted.coef_[active_set])

            P_M = np.matmul(X_M,X_M_pseudo_inv)

            reuseable_part1=np.matmul(X_notM.transpose(),np.eye(T_obs_train_)-P_M)

            A_matrix = np.concatenate([
                        reuseable_part1/lasso_lambda,
                        -reuseable_part1/lasso_lambda,
                        -np.matmul(np.diag(s_vec),X_M_pseudo_inv)],axis=0)



            reuseable_part2=np.matmul(np.matmul(X_notM.transpose(),X_M_pseudo_inv.transpose()),s_vec/omega_inv_M)

            b_vec =np.concatenate([
                        omega_inv_notM-reuseable_part2,
                        omega_inv_notM+reuseable_part2,
                        -np.matmul(np.matmul(np.diag(s_vec),X_M_gram_inv),s_vec/omega_inv_M)*lasso_lambda],axis=0)

            numerator=b_vec-np.matmul(A_matrix,z)

            denominator = np.reshape(np.matmul(A_matrix,xi),numerator.shape[0])


            V_minus_bool=(b_vec-np.matmul(A_matrix,y)>1e-16)&(denominator<0)
            V_plus_bool=(b_vec-np.matmul(A_matrix,y)>1e-16)&(denominator>0)
            if (len(numerator[V_minus_bool])==0) | (len(denominator[V_minus_bool])==0):
                V_minus = -np.inf
            else:
                V_minus=np.max(numerator[V_minus_bool]/denominator[V_minus_bool])

            if (len(numerator[V_plus_bool])==0) | (len(denominator[V_plus_bool])==0):
                V_plus = np.inf
            else:
                V_plus=np.max(numerator[V_plus_bool]/denominator[V_plus_bool])



            a,b=V_minus/std_beta_bar,V_plus/std_beta_bar
            studentized_posi=beta_bar[i_covariate]/std_beta_bar
  
            studentized_posi_vec[i_covariate]=studentized_posi
            trunc_a_vec[i_covariate]=a
            trunc_b_vec[i_covariate]=b
            # p_raw_vec[i_covariate]=p_raw
            if (beta_bar[i_covariate]>0):
                right_tail = truncnorm.logsf(studentized_posi, a=a, b=b)
                left_tail = truncnorm.logcdf(-studentized_posi, a=a, b=b)
            else:
                right_tail = truncnorm.logsf(-studentized_posi, a=a, b=b)
                left_tail = truncnorm.logcdf(studentized_posi, a=a, b=b)
            
            if (np.isnan(right_tail)) | (np.isnan(left_tail)):
                p_raw = np.nan
                continue


            if (np.isinf(-right_tail)) & (np.isinf(-left_tail)):
                p_raw = -np.inf
            elif np.abs(right_tail-left_tail)>16:
                p_raw = np.max([right_tail,left_tail])
            else:
                p_raw = np.log(np.exp(right_tail)+np.exp(left_tail))
            if p_raw < np.log(1e-16):
                p_raw = np.log(1e-16)
            p_raw_vec[i_covariate] = p_raw
        # p^{PoSI}    
        B = 2*int(d*N/min(gamma_vec))
 

        # posi_log_pval_matrix[active_set,i_unit]=trunc_t(studentized_posi_vec,trunc_a_vec,trunc_b_vec,T_obs_train_-X_M_card,B)

        posi_log_pval_matrix[active_set,i_unit]=p_raw_vec
        ##############################
        # p^{LASSO}

        var_b = estimated_var*(X_M_gram_inv.diagonal())
        sd_b = np.sqrt(var_b)
        studentized=-np.abs(lasso_fitted.coef_[active_set])/sd_b
        # print('LASSO t ',studentized)
        # studentized=-np.abs(beta_bar)/sd_b
        
        studentized_logpval=stats.t.logcdf(studentized,this_df)+np.log(2)
        t_log_pval_matrix[active_set,i_unit]=studentized_logpval


        
    #############################
    # p^{OLS}
    denom_vec=np.diag(np.matmul(covariates.transpose(),covariates))
    denom_vec=denom_vec.reshape((d,1))
    numerator_ols=np.matmul(covariates.transpose(),response)

    beta_matrix_ols=numerator_ols/denom_vec

    residuals_ols=np.sum(np.square(response-np.matmul(covariates,beta_matrix_ols)),axis=0)/(T_obs_train_-1)

    OLS_t_stat=beta_matrix_ols/np.reshape(residuals_ols,(1,N))*np.sqrt(denom_vec)

    OLS_t_stat_vec=stats.t.logsf(np.max(np.abs(OLS_t_stat),axis=1),T_obs_train_-1)
    selection_results = dict()
    for gamma in gamma_vec:
        selection_N_OLS=np.where(OLS_t_stat_vec<(np.log(gamma)))[0]
        selection_B_OLS=np.where(OLS_t_stat_vec<(np.log(gamma)-np.log(d)-np.log(N)))[0]

        t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
        selection_B_LASSO=np.where(t_log_pval_vec<(np.log(gamma)-np.log(d)-np.log(N)))[0]
        selection_N_LASSO=np.where(t_log_pval_vec<(np.log(gamma)))[0]

        selection_P_POSI, rho, selection_B_POSI =panel_posi_unordered(posi_log_pval_matrix,gamma)
        #selection_P_LASSO, _ =panel_posi_unordered(t_log_pval_matrix,gamma)


        selection_results[gamma] = {'N_OLS':selection_N_OLS,'B_OLS':selection_B_OLS,
                             'N_LASSO':selection_N_LASSO,'B_LASSO':selection_B_LASSO,#'P_LASSO':selection_P_LASSO,
                             'P_POSI':selection_P_POSI,'B_POSI':selection_B_POSI}
    return selection_results,  rho

In [67]:
def performance(all_response,selected_covariates,N,T_obs_train_):
    all_response_vec = all_response[T_obs_train_:,].flatten('F')

    if selected_covariates.shape[1]==0:
        return 0, np.sqrt(np.sum(np.square(all_response_vec))/len(all_response_vec))
    Xtrain = selected_covariates[:T_obs_train_,:]
    Xnew = selected_covariates[T_obs_train_:,:]

    # Xtrain = sm.add_constant(selected_covariates[:T_obs_train_,:])
    # Xnew = sm.add_constant(selected_covariates[T_obs_train_:,:])
    predicted_mat = np.zeros_like(all_response[T_obs_train_:,])
    for i_unit in range(N):    
        unit_OLS = OLS(all_response[:T_obs_train_,i_unit],Xtrain)
        curr_fitted_ = unit_OLS.fit()

        ynewpred = curr_fitted_.predict(Xnew)  # predict out of sample
        predicted_mat[:,i_unit]=ynewpred
        # raw_ooR2 = r2_score(y_true=all_response[T_obs_train_:,i_unit],y_pred = ynewpred)
        # curr_ooR2 = 1-(1-raw_ooR2)*(T_obs_train_-1)/(T_obs_train_-len(selection_result_panelPoSI)-1)
        # curr_ooRMSResid=np.sum(np.abs(all_response[T_obs_train_:,i_unit]-ynewpred))
        # ooR2_vec[i_unit]=curr_ooR2
        # ooRMSResid_vec[i_unit]=curr_ooRMSResid    

    predicted_vec = predicted_mat.flatten('F')

    ooR2 = r2_score(y_true=all_response_vec,y_pred = predicted_vec)

    ooRMS = np.sqrt(np.sum(np.square(all_response_vec-predicted_vec))/len(all_response_vec))
    return ooR2, ooRMS

In [73]:
# whuniform we do not change
N_vec = [120,200]
T_obs=300

d_vec = [100]
gamma_vec=[0.01,0.05]
noise_stds = [1.0,2.0]
factor_strengths = [0.5,1.0]
xsec_covs = [0,1.0]
gamma = 0.05
s_vec = [5,10]
s_weak=0
NumSimulation=8

# In[131]:
simu_detailed=simulate(NumSimulation,gamma_vec,d_vec,N_vec,T_obs,s_vec,s_weak,noise_stds,factor_strengths,xsec_covs)

  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)
  t_log_pval_vec=np.nanmin(t_log_pval_matrix,axis=1)


7 done


In [5]:
grid_search_summary[(grid_search_summary['gamma']==0.05)  ].to_csv('/home/jasonzou/PanelPoSI_Sept2022/csv_outputs/new_search.csv')

NameError: name 'grid_search_summary' is not defined

In [97]:
grid_search_summary[(grid_search_summary['names']=='B_LASSO') & (grid_search_summary['N Selections']>0)& (grid_search_summary['noise_std']==1)]

Unnamed: 0,gamma,names,xsec_cov,d,N,noise_std,factor_strength,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS,rho
1,0.01,B_LASSO,0.0,100,120,1.0,1.0,6.9375,0.0,6.9375,0.0,0.546287,1.037348,0.381687
5,0.01,B_LASSO,0.0,100,200,1.0,1.0,7.1875,0.0,7.1875,0.0,0.544347,1.035388,0.277276
9,0.01,B_LASSO,1.0,100,120,1.0,1.0,6.875,0.0,6.875,0.0,0.541053,1.033734,0.45149
13,0.01,B_LASSO,1.0,100,200,1.0,1.0,7.1875,0.0,7.1875,0.0,0.550296,1.014388,0.443198
97,0.05,B_LASSO,0.0,100,120,1.0,1.0,6.96875,0.0,6.96875,0.0,0.546532,1.037027,0.381687
101,0.05,B_LASSO,0.0,100,200,1.0,1.0,7.25,0.0,7.25,0.0,0.545981,1.033339,0.277276
105,0.05,B_LASSO,1.0,100,120,1.0,1.0,6.90625,0.0,6.90625,0.0,0.542648,1.031679,0.45149
109,0.05,B_LASSO,1.0,100,200,1.0,1.0,7.1875,0.0,7.1875,0.0,0.550296,1.014388,0.443198


In [123]:
grid_search_summary=simu_detailed.groupby(['gamma','names','xsec_cov','d','N','noise_std','s','factor_strength']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean','rho':'mean'
                                                                                        })
grid_search_summary.reset_index(inplace=True)
#(grid_search_summary['gamma']==0.05) & (grid_search_summary['xsec_cov']==0.0)
grid_search_summary[(grid_search_summary['d']==100)& \
                    (grid_search_summary['N']==120)&(grid_search_summary['noise_std']==2.0) &(grid_search_summary['factor_strength']==0.5)&(grid_search_summary['s']==10.0) ]

Unnamed: 0,gamma,names,xsec_cov,d,N,noise_std,s,factor_strength,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS,rho
6,0.01,B_LASSO,0.0,100,120,2.0,10,0.5,0.0,0.0,0.0,0.0,0.0,1.561781,0.093954
22,0.01,B_LASSO,1.0,100,120,2.0,10,0.5,0.0,0.0,0.0,0.0,0.0,1.560234,0.106523
38,0.01,B_OLS,0.0,100,120,2.0,10,0.5,0.25,0.0,0.25,0.0,0.005904,1.557068,0.093954
54,0.01,B_OLS,1.0,100,120,2.0,10,0.5,0.75,0.0,0.75,0.0,0.017918,1.545246,0.106523
70,0.01,B_POSI,0.0,100,120,2.0,10,0.5,2.25,0.0,2.25,0.0,0.047609,1.523911,0.093954
86,0.01,B_POSI,1.0,100,120,2.0,10,0.5,2.875,0.0,2.875,0.0,0.042484,1.525208,0.106523
102,0.01,N_LASSO,0.0,100,120,2.0,10,0.5,0.125,0.0,0.125,0.0,0.003835,1.55875,0.093954
118,0.01,N_LASSO,1.0,100,120,2.0,10,0.5,0.125,0.0,0.125,0.0,0.002852,1.558016,0.106523
134,0.01,N_OLS,0.0,100,120,2.0,10,0.5,49.125,39.25,9.875,0.0,-0.235362,1.735565,0.093954
150,0.01,N_OLS,1.0,100,120,2.0,10,0.5,36.75,26.75,10.0,0.0,-0.128919,1.65277,0.106523


In [11]:
grid_search_summary=simu_detailed.groupby(['gamma','names','xsec_cov','d','N','noise_std','factor_strength']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

# grid_search_summary.to_csv('/home/jasonzou/PanelPoSI_Sept2022/csv_outputs/grid_search_focus_on_d100N120.csv')

In [108]:
# grid_search_summary.reset_index(inplace=True)

settings = grid_search_summary.groupby(['gamma','d','N','xsec_cov','noise_std','s','factor_strength'])
for _, setting in settings:
    a = setting.loc[setting['names']=='P_POSI','OOS R2'].tolist()[0]
    b = setting.loc[setting['names']=='B_POSI','OOS R2'].tolist()[0]
    c = setting.loc[setting['names']=='N_LASSO','OOS R2'].tolist()[0]
    non_tri= (setting.loc[setting['names']=='N_LASSO','N Selections'].tolist()[0]>0) &(setting.loc[setting['names']=='B_LASSO','N Selections'].tolist()[0]>0) #
    if a>b and a>c :
        print(_,a-b,a-c)

(0.01, 100, 120, 0.0, 1.0, 5, 0.5) 0.0026607788606507754 0.13925768218998302
(0.01, 100, 120, 0.0, 1.0, 10, 0.5) 0.027460620568600436 0.23945403366313955
(0.01, 100, 120, 0.0, 2.0, 5, 0.5) 0.025242894422206466 0.06241061297742684
(0.01, 100, 120, 0.0, 2.0, 10, 0.5) 0.06004062427952543 0.1038153083827883
(0.01, 100, 120, 0.0, 2.0, 10, 1.0) 0.0016543102815307908 0.06385198018289578
(0.01, 100, 120, 1.0, 2.0, 5, 0.5) 0.021271127577374885 0.055587507165674496
(0.01, 100, 120, 1.0, 2.0, 10, 0.5) 0.049986630377588506 0.08961862810491437
(0.01, 100, 200, 0.0, 1.0, 10, 0.5) 0.009657039699561443 0.25118206210462135
(0.01, 100, 200, 0.0, 2.0, 5, 0.5) 0.008406422239513364 0.06023589908390814
(0.01, 100, 200, 0.0, 2.0, 10, 0.5) 0.04529280132844164 0.11241752637280875
(0.01, 100, 200, 1.0, 1.0, 5, 0.5) 0.00575478568340676 0.16008365901503396
(0.01, 100, 200, 1.0, 1.0, 10, 0.5) 0.06630190964267196 0.24234087740531196
(0.01, 100, 200, 1.0, 2.0, 10, 0.5) 0.0714470193258225 0.12474183110739527
(0.01, 1

In [22]:
grid_search_summary=simu_detailed.groupby(['gamma','names','d','N','noise_std','factor_strength']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })
grid_search_summary.to_csv('/home/jasonzou/PanelPoSI_Sept2022/csv_outputs/grid_search_summary_correlated_gaussian_sigma2_strengthHalf.csv')

In [18]:
grid_search_summary.reset_index(inplace=True)
settings = grid_search_summary.groupby(['gamma','d','N','noise_std','factor_strength'])
for _, setting in settings:
    a = setting.loc[setting['names']=='P_POSI','OOS RMS'].tolist()[0]
    b = setting.loc[setting['names']=='B_POSI','OOS RMS'].tolist()[0]
    if a<b:
        print(_,b-a)

(0.005, 100, 60, 1.0, 0.5) 0.0023756211419729834
(0.005, 100, 60, 2.0, 0.5) 0.044546654940549546
(0.005, 100, 60, 2.0, 1.0) 0.00046723402685833193
(0.005, 100, 100, 2.0, 0.5) 0.02154299131794568
(0.01, 100, 60, 1.0, 0.5) 0.0006734479494145962
(0.01, 100, 60, 2.0, 0.5) 0.039172486177906585
(0.01, 100, 100, 2.0, 0.5) 0.019934913285930778
(0.05, 100, 60, 2.0, 0.5) 0.024959171774941336
(0.05, 100, 100, 2.0, 0.5) 0.011067801848036796


In [35]:
# For debuggging
d=125
N=100
s=10
noise_std=1.0
xsec_cov=1.0
strong_factor_uniform_bound=0.5
full_beta=np.zeros((d,N))

# nonnull_beta = rnd.normal(loc=0,size=(s,N))


splitted=np.split(np.arange(N),s)
# splitted = np.split(np.arange(int(N*s/(s+1))),s)
for i_factor in range(s):

    curr_units=np.concatenate(splitted[-(i_factor):])

    # active_units = curr_units[rnd.uniform(0,1,len(curr_units))>0.5]
    full_beta[i_factor,curr_units]=rnd.uniform(low=-strong_factor_uniform_bound,high=strong_factor_uniform_bound,size=len(curr_units))


noise_cov = np.eye(N)*noise_std

for i_unit in range(N):
    other_factor = np.delete(np.arange(N),i_unit)
    for j_factor in other_factor:
        noise_cov[i_unit,j_factor] = xsec_cov
noises = rnd.multivariate_normal(mean=np.zeros(N),cov=noise_cov,size = T_obs)
# all_covariates = rnd.uniform(low=0,high=strong_factor_uniform_bound,size=(T_obs,d))
all_covariates = rnd.normal(size=(T_obs,d))


all_response=np.matmul(all_covariates,full_beta)+noises    

covariates =  all_covariates[:(int(0.5*T_obs)),:]

response =  all_response[:(int(0.5*T_obs)),:]
omega_inv_vec = np.ones(d)

d=covariates.shape[1]
N=response.shape[1]
T_obs_train_ = covariates.shape[0]

posi_log_pval_matrix=np.nan*np.ones((d,N))
t_log_pval_matrix=np.nan*np.ones((d,N))

for i_unit in range(N):

    lars = LassoLarsCV(cv=5,fit_intercept=False,normalize=False,max_n_alphas=d)

    lars_fitted=lars.fit(X=covariates,y=response[:,i_unit])

    mse_CVed=lars_fitted.mse_path_.mean(axis=1)

    picked_alpha = lars_fitted.cv_alphas_[np.argmin(mse_CVed)]

    lasso = Lasso(alpha=picked_alpha)

    lasso_fitted = lasso.fit(X=covariates,y=response[:,i_unit],)

    lasso_lambda = picked_alpha
    y=response[:,i_unit]

    active_set = lasso_fitted.coef_!=0

    X_M = covariates[:,active_set]
    omega_inv_M  = omega_inv_vec[active_set]
    omega_inv_notM  = omega_inv_vec[~active_set]

    X_notM = covariates[:,~active_set]

    X_M_card=X_M.shape[1]
    if X_M_card==0:
        continue
    ols_post_lasso=OLS(endog=y,exog=X_M)

    ols_post_lasso_fitted=ols_post_lasso.fit()

    beta_bar = ols_post_lasso_fitted.params

    X_M_gram = np.matmul(X_M.transpose(),X_M)

    X_M_gram_inv=pinv(X_M_gram)

    X_M_pseudo_inv=np.matmul(X_M_gram_inv,X_M.transpose())

    estimated_var=np.sum(np.power(y-lasso_fitted.predict(covariates),2))/(T_obs_train_-X_M_card)        
    Sigma = np.eye(T_obs_train_)*estimated_var
    p_raw_vec = np.zeros(X_M_card)
    studentized_posi_vec,trunc_a_vec,trunc_b_vec= np.zeros(X_M_card), np.zeros(X_M_card), np.zeros(X_M_card)

    # print('beta bar',beta_bar)
    # print("(X'X)^-1",X_M_gram_inv.diagonal())
    # print('s',np.sqrt(estimated_var*(X_M_gram_inv.diagonal())))

    for i_covariate in range(X_M_card):

        eta=np.reshape(X_M_pseudo_inv[i_covariate,:],newshape=(T_obs_train_,1))

        # var_beta_bar=np.matmul(np.matmul(eta.transpose(),Sigma),eta)
        # std_beta_bar = np.sqrt(var_beta_bar)
        var_beta_bar = estimated_var*(X_M_gram_inv.diagonal())[i_covariate]
        std_beta_bar = np.sqrt(var_beta_bar)

        # print('Estimated sigma(beta)',std_beta_bar)
        xi=np.reshape(np.matmul(Sigma,eta)/var_beta_bar,newshape=(T_obs_train_,1))

        z =np.matmul( np.eye(T_obs_train_)-np.matmul(xi,eta.transpose()), y)

        s_vec=np.sign(lasso_fitted.coef_[active_set])

        P_M = np.matmul(X_M,X_M_pseudo_inv)

        reuseable_part1=np.matmul(X_notM.transpose(),np.eye(T_obs_train_)-P_M)

        A_matrix = np.concatenate([
                    # reuseable_part1/lasso_lambda,
                    # -reuseable_part1/lasso_lambda,
                    -np.matmul(np.diag(s_vec),X_M_pseudo_inv)],axis=0)



        reuseable_part2=np.matmul(np.matmul(X_notM.transpose(),X_M_pseudo_inv.transpose()),s_vec/omega_inv_M)

        b_vec =np.concatenate([
                    # omega_inv_notM-reuseable_part2,
                    # omega_inv_notM+reuseable_part2,
                    -np.matmul(np.matmul(np.diag(s_vec),X_M_gram_inv),s_vec/omega_inv_M)*lasso_lambda],axis=0)

        numerator=b_vec-np.matmul(A_matrix,z)

        denominator = np.reshape(np.matmul(A_matrix,xi),numerator.shape[0])


        V_minus_bool=(b_vec-np.matmul(A_matrix,y)>1e-16)&(denominator<0)
        V_plus_bool=(b_vec-np.matmul(A_matrix,y)>1e-16)&(denominator>0)
        if (len(numerator[V_minus_bool])==0) | (len(denominator[V_minus_bool])==0):
            V_minus = -np.inf
        else:
            V_minus=np.max(numerator[V_minus_bool]/denominator[V_minus_bool])

        if (len(numerator[V_plus_bool])==0) | (len(denominator[V_plus_bool])==0):
            V_plus = np.inf
        else:
            V_plus=np.max(numerator[V_plus_bool]/denominator[V_plus_bool])



        a,b=V_minus/std_beta_bar,V_plus/std_beta_bar
        studentized_posi=beta_bar[i_covariate]/std_beta_bar

        studentized_posi_vec[i_covariate]=studentized_posi
        trunc_a_vec[i_covariate]=a
        trunc_b_vec[i_covariate]=b
        # p_raw_vec[i_covariate]=p_raw
        if (beta_bar[i_covariate]>0):
            right_tail = truncnorm.logsf(studentized_posi, a=a, b=b)
            left_tail = truncnorm.logcdf(-studentized_posi, a=a, b=b)
        else:
            right_tail = truncnorm.logsf(-studentized_posi, a=a, b=b)
            left_tail = truncnorm.logcdf(studentized_posi, a=a, b=b)

        if (np.isnan(right_tail)) | (np.isnan(left_tail)):
            p_raw = np.nan
            continue


        if (np.isinf(-right_tail)) & (np.isinf(-left_tail)):
            p_raw = -np.inf
        elif np.abs(right_tail-left_tail)>16:
            p_raw = np.max([right_tail,left_tail])
        else:
            p_raw = np.log(np.exp(right_tail)+np.exp(left_tail))
        if p_raw < np.log(1e-16):
            p_raw = np.log(1e-16)
        p_raw_vec[i_covariate] = p_raw
    # p^{PoSI}    
    B = 2*int(d*N/min(gamma_vec))


    # posi_log_pval_matrix[active_set,i_unit]=trunc_t(studentized_posi_vec,trunc_a_vec,trunc_b_vec,T_obs_train_-X_M_card,B)

    posi_log_pval_matrix[active_set,i_unit]=p_raw_vec
    ##############################
    # p^{LASSO}

    var_b = estimated_var*(X_M_gram_inv.diagonal())
    sd_b = np.sqrt(var_b)
    studentized=-np.abs(lasso_fitted.coef_[active_set])/sd_b
    # print('LASSO t ',studentized)
    # studentized=-np.abs(beta_bar)/sd_b

    studentized_logpval=stats.t.logcdf(studentized,T_obs_train_-X_M_card)+np.log(2)
    t_log_pval_matrix[active_set,i_unit]=studentized_logpval




posi_log_pval_matrix.shape
log_pval_matrix = posi_log_pval_matrix.copy()
M_set = (~np.isnan(log_pval_matrix)).sum(axis=0)
K_set = (~np.isnan(log_pval_matrix)).sum(axis=1)
simultaneity_count_array = np.zeros(shape=log_pval_matrix.shape[0])
for i in range(log_pval_matrix.shape[0]):
    simultaneity_count_array[i] = np.sum(M_set[np.where(~np.isnan(log_pval_matrix)[i,:])[0]])

log_pval_matrix[np.isnan(log_pval_matrix)] = np.inf
smallest_log_pval_array=np.nanmin(log_pval_matrix,axis=1) 
rho = np.sum(K_set[simultaneity_count_array>0]/simultaneity_count_array[simultaneity_count_array>0])

thresholds = np.log(gamma)-np.log(simultaneity_count_array)-np.log(rho)
bonf_thresholds = np.log(gamma)-np.log(log_pval_matrix.shape[0])-np.log(log_pval_matrix.shape[1])


In [274]:
# N = 60
# T_obs=300

# d_vec = [100]
# gamma_vec=[0.005,0.01,0.05]
# noise_stds = [1.0]

# gamma = 0.05
# s = 6
# xcov = 0.1
# s_weak=0
# NumSimulation=5

grid_search_summary=simu_detailed.groupby(['gamma','names','d','noise_std','factor_strength']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })
grid_search_summary.to_csv('/home/jasonzou/PanelPoSI_Sept2022/csv_outputs/grid_search_summary.csv')

In [192]:
# N = 60
# T_obs=300

# d_vec = [100]
# gamma_vec=[0.005,0.01,0.05]
# noise_stds = [1.0]

# gamma = 0.05
# s = 6
# xcov = 0.1
# s_weak=0
# NumSimulation=5

simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.005,B_LASSO,100,1.0,0.2,0.0,0.2,0.0,0.041465,1.133393
0.005,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.159712
0.005,B_POSI,100,1.0,3.8,0.2,3.6,0.0,0.210079,1.028735
0.005,N_LASSO,100,1.0,4.2,0.0,4.2,0.0,0.207113,1.030891
0.005,N_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.159712
0.005,P_POSI,100,1.0,10.2,5.4,4.8,0.0,0.184238,1.045384
0.01,B_LASSO,100,1.0,0.2,0.0,0.2,0.0,0.041465,1.133393
0.01,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.159712
0.01,B_POSI,100,1.0,4.2,0.5,3.7,0.0,0.208472,1.029775
0.01,N_LASSO,100,1.0,4.5,0.0,4.5,0.0,0.210062,1.028945


In [190]:
# N = 60
# T_obs=300

# d_vec = [100]
# gamma_vec=[0.005,0.01,0.05]
# noise_stds = [1.0]

# gamma = 0.05
# s = 4
# s_weak=0
# NumSimulation=5

simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.005,B_LASSO,100,1.0,0.2,0.0,0.2,0.0,0.029634,1.122901
0.005,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.140745
0.005,B_POSI,100,1.0,4.2,1.6,2.6,0.0,0.183156,1.029584
0.005,N_LASSO,100,1.0,2.2,0.2,2.0,0.0,0.182401,1.030011
0.005,N_OLS,100,1.0,0.2,0.0,0.2,0.0,0.019643,1.129249
0.005,P_POSI,100,1.0,10.4,6.6,3.8,0.0,0.160903,1.04337
0.01,B_LASSO,100,1.0,0.2,0.0,0.2,0.0,0.029634,1.122901
0.01,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.140745
0.01,B_POSI,100,1.0,4.4,1.6,2.8,0.0,0.183655,1.029285
0.01,N_LASSO,100,1.0,2.7,0.4,2.3,0.0,0.185113,1.028347


In [187]:
# N = 120
# T_obs=300

# d_vec = [100]
# gamma_vec=[0.005,0.01,0.05]
# noise_stds = [1.0]

# cov = 0.1

# gamma = 0.05
# s = 6
# s_weak=0
# NumSimulation=5

temp=simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })
temp.to_csv('/home/jasonzou/PanelPoSI_Sept2022/csv_outputs/summarized_09092022.csv')

In [184]:
# N = 120
# T_obs=300

# d_vec = [100]
# gamma_vec=[0.005,0.01,0.05]
# noise_stds = [1.0]

# cov = 0.1

# gamma = 0.05
# s = 6
# s_weak=0
# NumSimulation=5

simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.005,B_LASSO,100,1.0,0.2,0.0,0.2,0.0,0.039,1.152355
0.005,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.177015
0.005,B_POSI,100,1.0,5.4,1.2,4.2,0.0,0.237405,1.026198
0.005,N_LASSO,100,1.0,4.2,0.2,4.0,0.0,0.24024,1.024373
0.005,N_OLS,100,1.0,0.2,0.0,0.2,0.0,0.037387,1.153429
0.005,P_POSI,100,1.0,11.4,5.6,5.8,0.0,0.217673,1.039471
0.01,B_LASSO,100,1.0,0.3,0.0,0.3,0.0,0.043119,1.149612
0.01,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.177015
0.01,B_POSI,100,1.0,5.5,1.3,4.2,0.0,0.236755,1.026629
0.01,N_LASSO,100,1.0,4.9,0.5,4.4,0.0,0.242165,1.023062


In [182]:
# N = 120
# T_obs=300

# d_vec = [100]
# gamma_vec=[0.005,0.01,0.05]
# noise_stds = [1.0]

# cov = 0.1

# gamma = 0.05
# s = 15
# s_weak=0
# NumSimulation=5

simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.005,B_LASSO,100,1.0,0.2,0.0,0.2,0.0,0.0489,1.324157
0.005,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.358759
0.005,B_POSI,100,1.0,9.0,1.0,8.0,0.0,0.373661,1.073879
0.005,N_LASSO,100,1.0,12.2,0.8,11.4,0.0,0.393976,1.056528
0.005,N_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.358759
0.005,P_POSI,100,1.0,14.2,3.0,11.2,0.0,0.382716,1.066061
0.01,B_LASSO,100,1.0,0.2,0.0,0.2,0.0,0.0489,1.324157
0.01,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.358759
0.01,B_POSI,100,1.0,9.5,1.2,8.3,0.0,0.37469,1.072937
0.01,N_LASSO,100,1.0,13.2,1.3,11.9,0.0,0.391988,1.058218


In [180]:
# N = 100
# T_obs=300

# d_vec = [100]
# gamma_vec=[0.005,0.01,0.05]
# noise_stds = [1.0]

# cov = 0.1

# gamma = 0.05
# s = 10
# s_weak=0
# NumSimulation=5

simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.005,B_LASSO,100,1.0,0.2,0.0,0.2,0.0,0.045912,1.234755
0.005,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.265403
0.005,B_POSI,100,1.0,7.0,1.8,5.2,0.0,0.297605,1.059901
0.005,N_LASSO,100,1.0,7.0,0.0,7.0,0.0,0.320422,1.042488
0.005,N_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.265403
0.005,P_POSI,100,1.0,13.8,6.4,7.4,0.0,0.285368,1.069126
0.01,B_LASSO,100,1.0,0.3,0.0,0.3,0.0,0.069056,1.219015
0.01,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.265403
0.01,B_POSI,100,1.0,7.2,1.8,5.4,0.0,0.298774,1.059039
0.01,N_LASSO,100,1.0,7.7,0.4,7.3,0.0,0.319366,1.043251


In [178]:
# N = 200
# T_obs=300

# d_vec = [100]
# gamma_vec=[0.005,0.01,0.05]
# noise_stds = [1.0]

# cov = 0.1

# gamma = 0.05
# s = 10
# s_weak=0
# NumSimulation=5

simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.005,B_LASSO,100,1.0,0.0,0.0,0.0,0.0,0.0,1.261846
0.005,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.261846
0.005,B_POSI,100,1.0,8.6,1.0,7.6,0.0,0.317518,1.040858
0.005,N_LASSO,100,1.0,8.8,0.4,8.4,0.0,0.323845,1.036061
0.005,N_OLS,100,1.0,0.4,0.0,0.4,0.0,0.088658,1.202442
0.005,P_POSI,100,1.0,14.8,5.6,9.2,0.0,0.29424,1.058358
0.01,B_LASSO,100,1.0,0.0,0.0,0.0,0.0,0.0,1.261846
0.01,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.261846
0.01,B_POSI,100,1.0,8.9,1.1,7.8,0.0,0.319622,1.039245
0.01,N_LASSO,100,1.0,9.9,1.3,8.6,0.0,0.31855,1.040105


In [175]:
# N = 200
# T_obs=300

# d_vec = [100]
# gamma_vec=[0.005,0.01,0.05]
# noise_stds = [1.0]

# cov = 0.5

# gamma = 0.05
# s = 10
# s_weak=0
# NumSimulation=5

simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.005,B_LASSO,100,1.0,0.2,0.0,0.2,0.0,0.041461,1.272739
0.005,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.300132
0.005,B_POSI,100,1.0,9.6,3.2,6.4,0.0,0.288685,1.094619
0.005,N_LASSO,100,1.0,9.4,1.2,8.2,0.0,0.316508,1.073256
0.005,N_OLS,100,1.0,0.4,0.0,0.4,0.0,0.091909,1.235915
0.005,P_POSI,100,1.0,15.0,6.8,8.2,0.0,0.280573,1.100299
0.01,B_LASSO,100,1.0,0.2,0.0,0.2,0.0,0.041461,1.272739
0.01,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.300132
0.01,B_POSI,100,1.0,10.0,3.5,6.5,0.0,0.287068,1.095788
0.01,N_LASSO,100,1.0,10.6,2.1,8.5,0.0,0.31081,1.077827


In [173]:
# N = 30
# T_obs=300

# d_vec = [100]
# gamma_vec=[0.005,0.01,0.05]
# noise_stds = [1.0]

# cov = 0.5

# gamma = 0.05
# s = 15
# s_weak=0
# NumSimulation=5

simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.005,B_LASSO,100,1.0,0.4,0.0,0.4,0.0,0.065118,1.358987
0.005,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.407122
0.005,B_POSI,100,1.0,4.6,0.6,4.0,0.0,0.366036,1.115232
0.005,N_LASSO,100,1.0,6.4,0.0,6.4,0.0,0.378639,1.102074
0.005,N_OLS,100,1.0,0.2,0.0,0.2,0.0,0.063736,1.360109
0.005,P_POSI,100,1.0,8.4,0.6,7.8,0.0,0.400767,1.083639
0.01,B_LASSO,100,1.0,0.4,0.0,0.4,0.0,0.065118,1.358987
0.01,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.407122
0.01,B_POSI,100,1.0,4.9,0.6,4.3,0.0,0.371874,1.109897
0.01,N_LASSO,100,1.0,7.9,0.1,7.8,0.0,0.391075,1.091165


In [170]:
# N = 30
# T_obs=300

# d_vec = [100]
# gamma_vec=[0.005,0.01,0.05]
# noise_stds = [1.0]

# cov = 0.1

# gamma = 0.05
# s = 15
# s_weak=0
# NumSimulation=5

simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.005,B_LASSO,100,1.0,0.0,0.0,0.0,0.0,0.0,1.404
0.005,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.404
0.005,B_POSI,100,1.0,4.4,0.4,4.0,0.0,0.370628,1.102728
0.005,N_LASSO,100,1.0,4.4,0.4,4.0,0.0,0.366866,1.10443
0.005,N_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.404
0.005,P_POSI,100,1.0,8.6,1.6,7.0,0.0,0.392346,1.082435
0.01,B_LASSO,100,1.0,0.0,0.0,0.0,0.0,0.0,1.404
0.01,B_OLS,100,1.0,0.0,0.0,0.0,0.0,0.0,1.404
0.01,B_POSI,100,1.0,4.5,0.4,4.1,0.0,0.371013,1.102456
0.01,N_LASSO,100,1.0,5.7,0.4,5.3,0.0,0.381857,1.091907


In [189]:


# strong factor beta~ unif[-2];
# weak factor beta~ unif[-.1];
simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.01,B_LASSO,300,1.0,11.0,0.0,2.0,9.0,0.596787,1.017215
0.01,B_LASSO,300,2.0,3.75,0.0,2.0,1.75,0.246402,2.062
0.01,B_OLS,300,1.0,5.5,0.0,1.75,3.75,0.463476,1.171099
0.01,B_OLS,300,2.0,0.0,0.0,0.0,0.0,0.0,2.378236
0.01,B_POSI,300,1.0,12.75,1.25,2.0,9.5,0.597462,1.016256
0.01,B_POSI,300,2.0,10.0,0.5,2.0,7.5,0.264821,2.036406
0.01,N_LASSO,300,1.0,12.0,0.25,2.0,9.75,0.599948,1.013163
0.01,N_LASSO,300,2.0,8.75,0.25,2.0,6.5,0.262776,2.03899
0.01,N_OLS,300,1.0,15.75,6.75,2.0,7.0,0.568114,1.051926
0.01,N_OLS,300,2.0,3.75,0.0,2.0,1.75,0.241684,2.068698


In [187]:


# strong factor beta~ unif[-2];
# weak factor beta~ unif[-.1];
simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.01,B_LASSO,300,1.0,10.75,0.0,2.0,8.75,0.607042,1.015642
0.01,B_OLS,300,1.0,6.0,0.0,2.0,4.0,0.545405,1.093053
0.01,B_POSI,300,1.0,12.5,0.5,2.0,10.0,0.61002,1.011845
0.01,N_LASSO,300,1.0,11.75,0.0,2.0,9.75,0.610728,1.010907
0.01,N_OLS,300,1.0,15.0,5.75,2.0,7.25,0.580898,1.048741
0.01,P_POSI,300,1.0,15.25,3.25,2.0,10.0,0.60442,1.01889
0.05,B_LASSO,300,1.0,10.75,0.0,2.0,8.75,0.607042,1.015642
0.05,B_OLS,300,1.0,6.25,0.0,2.0,4.25,0.548821,1.088823
0.05,B_POSI,300,1.0,12.5,0.5,2.0,10.0,0.61002,1.011845
0.05,N_LASSO,300,1.0,12.125,0.25,2.0,9.875,0.61026,1.011483


In [183]:


# strong factor beta~ unif[-2];
# weak factor beta~ unif[-.5];
simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.01,B_LASSO,300,1.0,4.75,0.0,2.0,2.75,0.552042,1.024514
0.01,B_OLS,300,1.0,3.25,0.0,2.0,1.25,0.545704,1.03179
0.01,B_POSI,300,1.0,10.25,0.5,2.0,7.75,0.5617,1.01335
0.01,N_LASSO,300,1.0,10.0,0.0,2.0,8.0,0.563471,1.011285
0.01,N_OLS,300,1.0,19.25,11.5,2.0,5.75,0.53512,1.043151
0.01,P_POSI,300,1.0,13.75,3.5,2.0,8.25,0.556089,1.019686
0.05,B_LASSO,300,1.0,4.75,0.0,2.0,2.75,0.552042,1.024514
0.05,B_OLS,300,1.0,3.25,0.0,2.0,1.25,0.545704,1.03179
0.05,B_POSI,300,1.0,10.25,0.5,2.0,7.75,0.5617,1.01335
0.05,N_LASSO,300,1.0,10.375,0.25,2.0,8.125,0.563075,1.011712


In [181]:


# strong factor beta~ unif[-1];
# weak factor beta~ unif[-.5];
simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.01,B_LASSO,300,1.0,5.0,0.0,2.0,3.0,0.258005,1.028523
0.01,B_OLS,300,1.0,4.2,0.0,2.0,2.2,0.246362,1.036584
0.01,B_POSI,300,1.0,10.0,0.6,2.0,7.4,0.269547,1.020367
0.01,N_LASSO,300,1.0,8.4,0.0,2.0,6.4,0.27294,1.017985
0.01,N_OLS,300,1.0,32.2,22.4,2.0,7.8,0.197727,1.068793
0.01,P_POSI,300,1.0,15.8,5.4,2.0,8.4,0.255928,1.029719
0.05,B_LASSO,300,1.0,5.2,0.0,2.0,3.2,0.258697,1.028058
0.05,B_OLS,300,1.0,4.6,0.0,2.0,2.6,0.24909,1.034743
0.05,B_POSI,300,1.0,10.0,0.6,2.0,7.4,0.269547,1.020367
0.05,N_LASSO,300,1.0,9.3,0.3,2.0,7.0,0.271937,1.018692


In [144]:

# N = 200
# T = 400
# d = 150
# s = 2
# s_weak = 5
# strong factor beta~ unif[-1.1];
# weak factor beta~ unif[-.2];
simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.01,B_LASSO,100,1.0,10.05,0.0,3.0,7.05,0.482479,1.037155
0.01,B_LASSO,100,2.0,3.1,0.0,3.0,0.1,0.182405,2.040082
0.01,B_OLS,100,1.0,11.05,0.15,3.0,7.9,0.483153,1.036475
0.01,B_OLS,100,2.0,0.3,0.0,0.3,0.0,0.019666,2.233645
0.01,B_POSI,100,1.0,15.3,2.3,3.0,10.0,0.47852,1.041069
0.01,B_POSI,100,2.0,10.4,1.45,3.0,5.95,0.161641,2.065746
0.01,N_LASSO,100,1.0,14.8,1.8,3.0,10.0,0.479974,1.039628
0.01,N_LASSO,100,2.0,9.35,0.85,3.0,5.5,0.165162,2.061442
0.01,N_OLS,100,1.0,91.55,78.55,3.0,10.0,0.107754,1.361428
0.01,N_OLS,100,2.0,12.1,3.1,3.0,6.0,0.153712,2.075491


In [132]:

# N = 200
# T = 400
# d = 150
# s = 2
# s_weak = 5
# strong factor beta~ unif[-1.1];
# weak factor beta~ unif[-.2];
simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.01,B_LASSO,50,1.0,5.0,0.0,5.0,0.0,0.627921,1.008736
0.01,B_LASSO,50,2.0,4.8,0.0,4.8,0.0,0.287942,2.048048
0.01,B_LASSO,50,4.0,0.4,0.0,0.4,0.0,0.006306,4.242427
0.01,B_OLS,50,1.0,6.0,1.0,5.0,0.0,0.624708,1.013016
0.01,B_OLS,50,2.0,1.6,0.0,1.6,0.0,0.092159,2.311041
0.01,B_OLS,50,4.0,0.0,0.0,0.0,0.0,0.0,4.256238
0.01,B_POSI,50,1.0,5.0,0.0,5.0,0.0,0.627921,1.008736
0.01,B_POSI,50,2.0,5.0,0.0,5.0,0.0,0.297024,2.034655
0.01,B_POSI,50,4.0,3.8,0.2,3.6,0.0,0.0584,4.129241
0.01,N_LASSO,50,1.0,5.0,0.0,5.0,0.0,0.627921,1.008736


In [95]:

# N = 200
# T = 400
# d = 150
# s = 2
# s_weak = 5
# strong factor beta~ unif[-1.1];
# weak factor beta~ unif[-.2];
simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.01,B_LASSO,150,0.5,2.8,0.0,2.0,0.8,0.709669,0.514344
0.01,B_LASSO,150,1.0,2.0,0.0,2.0,0.0,0.369087,1.014163
0.01,B_LASSO,150,2.0,2.0,0.0,2.0,0.0,0.132435,2.016522
0.01,B_OLS,150,0.5,33.6,27.0,2.0,4.6,0.663983,0.55258
0.01,B_OLS,150,1.0,2.0,0.0,2.0,0.0,0.369087,1.014163
0.01,B_OLS,150,2.0,0.0,0.0,0.0,0.0,0.0,2.16514
0.01,B_POSI,150,0.5,10.0,3.0,2.0,5.0,0.711557,0.512607
0.01,B_POSI,150,1.0,6.2,2.2,2.0,2.0,0.35834,1.022694
0.01,B_POSI,150,2.0,5.8,3.8,2.0,0.0,0.114996,2.036665
0.01,N_LASSO,150,0.5,8.8,1.8,2.0,5.0,0.713534,0.510894


In [90]:

# N = 200
# T = 400
# d = 150
# strong factor beta~ unif[-0.1];
# weak factor beta~ unif[-1.0];
simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'median','N False Selections':'median',
                                                                                         'N Strong':'median','N Weak':'median',
                                                                                         'OOS R2':'median','OOS RMS':'median',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.01,B_LASSO,150,0.5,2.0,0.0,0.0,2.0,0.545085,0.515694
0.01,B_LASSO,150,1.0,2.0,0.0,0.0,2.0,0.237736,1.009495
0.01,B_LASSO,150,2.0,2.0,0.0,0.0,2.0,0.069932,2.009483
0.01,B_OLS,150,0.5,112.0,106.0,4.0,2.0,0.108494,0.74919
0.01,B_OLS,150,1.0,2.0,0.0,0.0,2.0,0.230501,1.009495
0.01,B_OLS,150,2.0,0.0,0.0,0.0,0.0,0.0,2.081933
0.01,B_POSI,150,0.5,8.0,2.0,4.0,2.0,0.552445,0.512657
0.01,B_POSI,150,1.0,4.0,2.0,1.0,2.0,0.224842,1.01555
0.01,B_POSI,150,2.0,4.0,2.0,0.0,2.0,0.056256,2.020112
0.01,N_LASSO,150,0.5,6.0,0.0,3.0,2.0,0.557946,0.50956


In [87]:


# strong factor beta~ unif[-0.1];
# weak factor beta~ unif[-1.0];
simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'median','N False Selections':'median',
                                                                                         'N Strong':'median','N Weak':'median',
                                                                                         'OOS R2':'median','OOS RMS':'median',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.01,B_LASSO,150,1.0,4.0,0.0,0.0,4.0,0.230626,1.016958
0.01,B_LASSO,150,2.0,3.0,0.0,0.0,3.0,0.051529,2.022544
0.01,B_OLS,150,1.0,4.0,0.0,0.0,4.0,0.230626,1.016958
0.01,B_OLS,150,2.0,0.0,0.0,0.0,0.0,0.0,2.085878
0.01,B_POSI,150,1.0,8.0,3.0,0.0,4.0,0.22057,1.022222
0.01,B_POSI,150,2.0,6.0,2.0,0.0,4.0,0.045967,2.027239
0.01,N_LASSO,150,1.0,5.0,1.0,0.0,4.0,0.227615,1.019547
0.01,N_LASSO,150,2.0,5.0,1.0,0.0,4.0,0.05806,2.025647
0.01,N_OLS,150,1.0,150.0,144.0,2.0,4.0,-1.886446,1.979619
0.01,N_OLS,150,2.0,7.0,3.0,0.0,4.0,0.043334,2.040137


In [84]:


# strong factor beta~ unif[-1.0];
# weak factor beta~ unif[-0.2];
simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'median','N False Selections':'median',
                                                                                         'N Strong':'median','N Weak':'median',
                                                                                         'OOS R2':'median','OOS RMS':'median',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.01,B_LASSO,150,1.0,2.0,0.0,2.0,0.0,0.403191,1.010387
0.01,B_OLS,150,1.0,2.0,0.0,2.0,0.0,0.403191,1.011921
0.01,B_POSI,150,1.0,7.0,3.0,2.0,2.0,0.399075,1.02008
0.01,N_LASSO,150,1.0,4.0,2.0,2.0,1.0,0.401815,1.018558
0.01,N_OLS,150,1.0,132.0,126.0,2.0,4.0,-0.739783,1.740358
0.01,P_LASSO,150,1.0,2.0,0.0,2.0,0.0,0.403191,1.010387
0.01,P_POSI,150,1.0,7.0,3.0,2.0,2.0,0.399075,1.02008
0.05,B_LASSO,150,1.0,2.0,0.0,2.0,0.0,0.403191,1.010387
0.05,B_OLS,150,1.0,2.0,0.0,2.0,0.0,0.403191,1.012206
0.05,B_POSI,150,1.0,7.0,3.0,2.0,2.0,0.399075,1.02008


In [79]:


# strong factor beta~ unif[-0.5,0.5];
# weak factor beta~ unif[-0.25,0.25];
simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'median','N False Selections':'median',
                                                                                         'N Strong':'median','N Weak':'median',
                                                                                         'OOS R2':'median','OOS RMS':'median',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.01,B_LASSO,150,1.0,2.0,0.0,2.0,0.0,0.133139,1.013749
0.01,B_OLS,150,1.0,4.0,0.0,2.0,2.0,0.132979,1.013749
0.01,B_POSI,150,1.0,8.0,2.0,2.0,4.0,0.116214,1.021731
0.01,N_LASSO,150,1.0,6.0,0.0,2.0,3.0,0.131702,1.013061
0.01,N_OLS,150,1.0,150.0,144.0,2.0,4.0,-2.400791,2.004254
0.01,P_LASSO,150,1.0,2.0,0.0,2.0,0.0,0.133139,1.013749
0.01,P_POSI,150,1.0,8.0,2.0,2.0,4.0,0.116214,1.021731
0.05,B_LASSO,150,1.0,2.0,0.0,2.0,0.0,0.133139,1.013749
0.05,B_OLS,150,1.0,4.5,0.0,2.0,2.0,0.132979,1.014983
0.05,B_POSI,150,1.0,8.0,2.0,2.0,4.0,0.116214,1.021731


In [74]:


# strong factor beta~ unif[-0.5,0.5];
# weak factor beta~ unif[-0.25,0.25];
simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'median','N False Selections':'median',
                                                                                         'N Strong':'median','N Weak':'median',
                                                                                         'OOS R2':'median','OOS RMS':'median',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.01,B_LASSO,150,1.0,3.0,0.0,3.0,0.0,0.177164,1.013807
0.01,B_OLS,150,1.0,4.0,0.0,3.0,1.0,0.177164,1.013807
0.01,B_POSI,150,1.0,8.0,2.0,3.0,2.0,0.16322,1.022201
0.01,N_LASSO,150,1.0,6.0,1.0,3.0,3.0,0.169058,1.016675
0.01,N_OLS,150,1.0,148.0,142.0,3.0,3.0,-2.22069,1.993099
0.01,P_LASSO,150,1.0,3.0,0.0,3.0,0.0,0.177164,1.013807
0.01,P_POSI,150,1.0,8.0,2.0,3.0,2.0,0.16322,1.022201
0.05,B_LASSO,150,1.0,3.0,0.0,3.0,0.0,0.177164,1.013807
0.05,B_OLS,150,1.0,4.0,0.0,3.0,1.0,0.175498,1.013998
0.05,B_POSI,150,1.0,8.0,2.0,3.0,2.0,0.16322,1.022201


In [71]:

# strong factor beta~ unif[-0.5,0.5];
# weak factor beta~ unif[-0.25,0.25];
simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'median','N False Selections':'median',
                                                                                         'N Strong':'median','N Weak':'median',
                                                                                         'OOS R2':'median','OOS RMS':'median',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.01,B_LASSO,60,1.0,3.0,0.0,3.0,0.0,0.187725,1.017464
0.01,B_OLS,60,1.0,6.0,0.5,3.0,3.0,0.191236,1.017078
0.01,B_POSI,60,1.0,6.0,0.0,3.0,2.5,0.191709,1.015959
0.01,N_LASSO,60,1.0,6.0,0.0,3.0,2.0,0.19103,1.017498
0.01,N_OLS,60,1.0,58.0,52.0,3.0,3.0,-0.023552,1.143202
0.01,P_LASSO,60,1.0,3.0,0.0,3.0,0.0,0.187725,1.017464
0.01,P_POSI,60,1.0,6.0,0.0,3.0,2.5,0.191709,1.016574


In [68]:

# strong factor beta~ unif[-0.5,0.5];
# weak factor beta~ unif[-0.25,0.25];
simu_detailed.groupby(['gamma','names','d','noise_std']).agg({'N Selections':'median','N False Selections':'median',
                                                                                         'N Strong':'median','N Weak':'median',
                                                                                         'OOS R2':'median','OOS RMS':'median',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.01,B_LASSO,60,0.5,6.0,0.0,3.0,3.0,0.500239,0.505476
0.01,B_LASSO,60,1.0,3.0,0.0,3.0,0.0,0.196221,1.010552
0.01,B_OLS,60,0.5,56.0,50.0,3.0,3.0,0.370927,0.567896
0.01,B_OLS,60,1.0,7.0,1.0,3.0,3.0,0.199837,1.008275
0.01,B_POSI,60,0.5,6.0,0.0,3.0,3.0,0.495222,0.504605
0.01,B_POSI,60,1.0,5.0,0.0,3.0,2.0,0.201723,1.006847
0.01,N_LASSO,60,0.5,6.0,0.0,3.0,3.0,0.497406,0.504605
0.01,N_LASSO,60,1.0,5.0,0.0,3.0,2.0,0.202103,1.006847
0.01,N_OLS,60,0.5,60.0,54.0,3.0,3.0,0.35699,0.574152
0.01,N_OLS,60,1.0,58.0,52.0,3.0,3.0,-0.025017,1.139766


In [45]:

# uniform
simu_detailed.groupby(['gamma','names','d','noise_unk_std','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_unk_std,noise_std,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.01,B_LASSO,100,Unknown,0.5,6.0,0.0,3.0,3.0,0.592886,0.504998
0.01,B_LASSO,100,Unknown,1.0,6.0,0.0,3.0,3.0,0.261817,1.016268
0.01,B_LASSO,100,Unknown,2.0,0.0,0.0,0.0,0.0,0.0,2.098007
0.01,B_LASSO,100,Unknown,4.0,0.0,0.0,0.0,0.0,0.0,4.035215
0.01,B_OLS,100,Unknown,0.5,71.666667,65.666667,3.0,3.0,0.43933,0.592553
0.01,B_OLS,100,Unknown,1.0,7.666667,1.666667,3.0,3.0,0.256514,1.019908
0.01,B_OLS,100,Unknown,2.0,0.0,0.0,0.0,0.0,0.0,2.098007
0.01,B_OLS,100,Unknown,4.0,0.0,0.0,0.0,0.0,0.0,4.035215
0.01,B_POSI,100,Unknown,0.5,7.0,1.0,3.0,3.0,0.591605,0.505797
0.01,B_POSI,100,Unknown,1.0,6.333333,0.333333,3.0,3.0,0.260722,1.017026


In [42]:

# s = 0.5
simu_detailed.groupby(['gamma','names','d','noise_unk_std','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_unk_std,noise_std,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.01,B_LASSO,100,Unknown,0.5,8.0,0.0,5.0,3.0,0.891356,0.508296
0.01,B_LASSO,100,Unknown,1.0,8.0,0.0,5.0,3.0,0.667394,1.023138
0.01,B_LASSO,100,Unknown,2.0,8.0,0.0,5.0,3.0,0.334231,2.028859
0.01,B_LASSO,100,Unknown,4.0,0.333333,0.0,0.0,0.333333,0.002242,4.257232
0.01,B_OLS,100,Unknown,0.5,13.0,5.0,5.0,3.0,0.889035,0.513566
0.01,B_OLS,100,Unknown,1.0,8.0,0.0,5.0,3.0,0.667394,1.023138
0.01,B_OLS,100,Unknown,2.0,0.666667,0.0,0.666667,0.0,0.032333,2.446648
0.01,B_OLS,100,Unknown,4.0,0.0,0.0,0.0,0.0,0.0,4.262036
0.01,B_POSI,100,Unknown,0.5,8.666667,0.666667,5.0,3.0,0.890937,0.509285
0.01,B_POSI,100,Unknown,1.0,9.0,1.0,5.0,3.0,0.665875,1.02547


In [40]:

# s = 0.1
simu_detailed.groupby(['gamma','names','d','noise_unk_std','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_unk_std,noise_std,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.01,B_LASSO,100,Unknown,0.5,8.0,0.0,5.0,3.0,0.8705,0.50894
0.01,B_LASSO,100,Unknown,1.0,8.0,0.0,5.0,3.0,0.627686,1.018101
0.01,B_LASSO,100,Unknown,2.0,8.0,0.0,5.0,3.0,0.286548,2.027217
0.01,B_LASSO,100,Unknown,4.0,0.333333,0.0,0.333333,0.0,0.006667,4.208035
0.01,B_OLS,100,Unknown,0.5,16.666667,8.666667,5.0,3.0,0.865483,0.518622
0.01,B_OLS,100,Unknown,1.0,8.333333,0.333333,5.0,3.0,0.627089,1.018927
0.01,B_OLS,100,Unknown,2.0,3.0,0.0,3.0,0.0,0.163224,2.194416
0.01,B_OLS,100,Unknown,4.0,0.0,0.0,0.0,0.0,0.0,4.222194
0.01,B_POSI,100,Unknown,0.5,8.666667,0.666667,5.0,3.0,0.870238,0.509444
0.01,B_POSI,100,Unknown,1.0,8.666667,0.666667,5.0,3.0,0.626857,1.019222


In [37]:
simu_detailed.groupby(['gamma','names','d','noise_unk_std','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_unk_std,noise_std,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.01,B_LASSO,100,Unknown,1.0,8.0,0.0,5.0,3.0,0.821758,1.014965
0.01,B_LASSO,100,Unknown,1.5,8.0,0.0,5.0,3.0,0.702313,1.524606
0.01,B_LASSO,100,Unknown,2.0,8.0,0.0,5.0,3.0,0.56827,2.026741
0.01,B_OLS,100,Unknown,1.0,8.0,0.0,5.0,3.0,0.821758,1.014965
0.01,B_OLS,100,Unknown,1.5,7.666667,0.0,5.0,2.666667,0.699674,1.531804
0.01,B_OLS,100,Unknown,2.0,5.666667,0.0,5.0,0.666667,0.549099,2.071054
0.01,B_POSI,100,Unknown,1.0,9.333333,1.333333,5.0,3.0,0.820836,1.017679
0.01,B_POSI,100,Unknown,1.5,9.666667,1.666667,5.0,3.0,0.700051,1.530272
0.01,B_POSI,100,Unknown,2.0,9.0,1.0,5.0,3.0,0.565982,2.032085
0.01,N_LASSO,100,Unknown,1.0,8.666667,0.666667,5.0,3.0,0.821345,1.016154


In [36]:
simu_detailed.groupby(['gamma','names','d','noise_unk_std','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_unk_std,noise_std,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.01,B_LASSO,100,Unknown,1.0,8.0,0.0,5.0,3.0,0.821758,1.014965
0.01,B_LASSO,100,Unknown,1.5,8.0,0.0,5.0,3.0,0.702313,1.524606
0.01,B_LASSO,100,Unknown,2.0,8.0,0.0,5.0,3.0,0.56827,2.026741
0.01,B_OLS,100,Unknown,1.0,8.0,0.0,5.0,3.0,0.821758,1.014965
0.01,B_OLS,100,Unknown,1.5,7.666667,0.0,5.0,2.666667,0.699674,1.531804
0.01,B_OLS,100,Unknown,2.0,5.666667,0.0,5.0,0.666667,0.549099,2.071054
0.01,B_POSI,100,Unknown,1.0,9.333333,1.333333,5.0,3.0,0.820836,1.017679
0.01,B_POSI,100,Unknown,1.5,9.666667,1.666667,5.0,3.0,0.700051,1.530272
0.01,B_POSI,100,Unknown,2.0,9.0,1.0,5.0,3.0,0.565982,2.032085
0.01,N_LASSO,100,Unknown,1.0,8.666667,0.666667,5.0,3.0,0.821345,1.016154


In [31]:
simu_detailed.groupby(['gamma','names','d','noise_unk_std','noise_std']).agg({'N Selections':'mean','N False Selections':'mean',
                                                                                         'N Strong':'mean','N Weak':'mean',
                                                                                         'OOS R2':'mean','OOS RMS':'mean',
                                                                                        })

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,N Selections,N False Selections,N Strong,N Weak,OOS R2,OOS RMS
gamma,names,d,noise_unk_std,noise_std,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.01,B_LASSO,100,Unknown,1.0,8.0,0.0,5.0,3.0,0.627127,1.011029
0.01,B_LASSO,100,Unknown,1.5,8.0,0.0,5.0,3.0,0.429602,1.523926
0.01,B_LASSO,100,Unknown,2.0,7.666667,0.0,5.0,2.666667,0.280486,2.034112
0.01,B_OLS,100,Unknown,1.0,8.333333,0.333333,5.0,3.0,0.626797,1.011478
0.01,B_OLS,100,Unknown,1.5,7.0,0.0,5.0,2.0,0.427173,1.527212
0.01,B_OLS,100,Unknown,2.0,1.0,0.0,0.666667,0.333333,0.035702,2.354679
0.01,B_POSI,100,Unknown,1.0,9.333333,1.333333,5.0,3.0,0.624645,1.014409
0.01,B_POSI,100,Unknown,1.5,10.0,2.0,5.0,3.0,0.424699,1.530482
0.01,B_POSI,100,Unknown,2.0,8.666667,0.666667,5.0,3.0,0.278742,2.036549
0.01,N_LASSO,100,Unknown,1.0,9.0,1.0,5.0,3.0,0.625167,1.013702


In [None]:
from time import gmtime, strftime
affix = strftime("%m%d_%H%M%S", gmtime())
simu_detailed.to_csv('/home/jasonzou/PanelPoSI_Sept2022/csv_outputs/simulation'+affix+'.csv')