In [1]:
import yaml
from yaml.loader import SafeLoader
from socket import gethostname
import numpy as np
import pandas as pd
from sklearn.base import clone
from dev_interaction_util import generate_synthetic_dev_outcomes, generate_synthetic_dev_data, set_up_interactions
from dev_interaction_util import do_scoring_loop, get_best_model, summarize_overall_df_results, do_final_fit, present_model_results, present_results_vs_ground_truth_cors
from dev_interaction_util import load_and_preprocess_data, impute_data, run_full_limited_predictor_analysis
from ml_util import *
# Imputing with MICE
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, KNNImputer
from sklearn import linear_model
from ml_util import get_data_for_imputation
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.inspection import permutation_importance
import numpy as np
from IPython.display import display, HTML
from sklearn.base import clone
from sklearn.inspection import permutation_importance
import seaborn as sns
from sklearn.feature_selection import SelectKBest, f_regression, RFE


In [2]:


print(gethostname())
# Open the file and load the file
with open('config.yml') as f:
    all_yaml = yaml.load(f, Loader=SafeLoader)
    if gethostname() in all_yaml.keys():
        config = all_yaml[gethostname()]
    else:
        config = all_yaml['default']
        
print(config)



Benjamins-MacBook-Pro-2.local
{'dropbox_data_dir': '/Users/benjaminsmith/Dropbox (University of Oregon)/UO-SAN Lab/Berkman Lab/Devaluation/analysis_files/data/'}


This notebook is derived from `test_feature_selection.ipynb`.

In [3]:
dropbox_data_dir = config['dropbox_data_dir']


In [4]:
analysis_data, outcome_measures = load_and_preprocess_data(dropbox_data_dir)

In [5]:
analysis_data_imputed = impute_data(analysis_data)



In [6]:


def do_hyperparameter_selection_loop_r2(X,y,cv):
    return(do_hyperparameter_selection_loop_w_metric(X,y,cv,'r2'))

#loops through the different estimators and feature selection methods and does a grid search over all to find the best hyperparameters
def do_hyperparameter_selection_loop(X, y,cv):
    return(do_hyperparameter_selection_loop_w_metric(X,y,cv,'neg_mean_absolute_error'))

#loops through the different estimators and feature selection methods and does a grid search over all to find the best hyperparameters
def do_hyperparameter_selection_loop_w_metric(X, y,cv,metric):
    #alpha parameters for Ridge and Lasso
    alpha_10pow_lower = 1
    alpha_10pow_upper = 0
    alpha_increments=1
    alpha_range = np.concatenate([np.power(10,np.linspace(-alpha_10pow_lower,alpha_10pow_upper,(alpha_10pow_lower+alpha_10pow_upper)*alpha_increments+1)),
        [0.2,0.3,0.4,0.6,0.8,1.0]])
    
    all_cv_results = []

    pipeline_estimator_name = 'estimator'
    feature_selection_name = 'feature_selection'


    #define the param_grid for the estimators
    estimators_to_run = {
        'Ridge':{
            'estimator':linear_model.Ridge,
            'parameters':{'alpha':alpha_range}
        },
        'Lasso':{
            'estimator':linear_model.Lasso,
            'parameters':{'alpha':alpha_range}
        },
        'DecisionTreeRegressor':{
            'estimator':DecisionTreeRegressor,
            'parameters':{
                'max_depth':[2, 4],
                'min_samples_split':[20,50],
                'min_samples_leaf':[20,50]
            }
        }             
    }

    k_max_val = np.min([50,X.shape[1]])

    for estimator_name,estimator_dict in estimators_to_run.items():
        #param grid for the feature seelction
        #this is here because we need to know the estimator to pass to the feature selector
        feature_selectors_to_run = {
            'None':None,
            'KBest':{
                'selector':SelectKBest(),
                'parameters':{
                    'score_func' : [f_regression], 
                    'k' : [10,25,k_max_val]
                    }
            },
            'RFE':{
                'selector':RFE(linear_model.LinearRegression()),
                'parameters':{
                    'n_features_to_select' : [10,25],
                    #'verbose':[1],
                    'step':[5]
                }
            }
        }
        for selector_name, selector_dict in feature_selectors_to_run.items():
        #create the estimator
            if selector_name == 'None':
                pipeline = Pipeline([('scaler',StandardScaler()),
                                     (pipeline_estimator_name,estimator_dict['estimator']())])
                selector_params = {}
            else:
                pipeline = Pipeline([('scaler',StandardScaler()),
                                     (feature_selection_name,selector_dict['selector']), 
                                     (pipeline_estimator_name,estimator_dict['estimator']())])
                selector_params = selector_dict['parameters']

            estimator_param_grid = {(pipeline_estimator_name + '__'+k):v for k,v in estimator_dict['parameters'].items()}
            selector_param_grid = {(feature_selection_name + '__'+k):v for k,v in selector_params.items()}
            #combine the two param grid dictionaries
            full_param_grid = {**selector_param_grid, **estimator_param_grid}
            print(pipeline)
            print(full_param_grid)

            
        
            gs_1 = GridSearchCV(estimator=pipeline, 
                                param_grid = full_param_grid, 
                                cv=cv,scoring=metric,verbose=1)
            gs_1.fit(X,y)
            all_cv_results.append(gs_1)

    #create a dataframe with the best parameters, best mean_test_score, and name of the model

    best_params_df = pd.DataFrame({
        'model': [cv_result.estimator for cv_result in all_cv_results],
        'model_name': [cv_result.estimator.__class__.__name__ for cv_result in all_cv_results],
        'best_params': [extract_estimator_params_from_gridsearch(cv_result.best_params_) for cv_result in all_cv_results],
        'best_score': [cv_result.best_score_ for cv_result in all_cv_results],
        'best_raw_params' : [cv_result.best_params_ for cv_result in all_cv_results]
        })
    
    best_params_df = best_params_df.sort_values('best_score',ascending=False).reset_index(drop=True)

    best_model = clone(best_params_df['model'][0])
    best_model_params = best_params_df['best_raw_params'][0]
    best_model.set_params(**best_model_params)

    return {
        'best_model': best_model,
        'best_params_df':best_params_df,
        'raw_cv_results':all_cv_results
    }


# Improving fit with manual theory-driven feature

My past analysis showed that by manually removing some features before the analysis starts, we can improve performance beyond the chance performance otherwise seen.

So, it might be useful to understand how much we can improve our performance by manual feature selection before the automatic feature selection applies.

This was previously done in `test_limited_predictors.ipynb`. We tested as few as 2 distractor features. In that test, predictor features generally had correlations in the range of |r|=0.06 to 0.53, with most around 0.4 (we should confirm that because it seems fishy that PCS was detegted as an effect, but didn't model as a large predictor). With most `|r|=0.4`, this seems unrealistically high to expect, and we should aim to build a pipeline capable of detecting more subtle effects than that. An approximate `|r|=0.3` can be achieved by mixing in a predictor scaled to 8% of normal scale.

I can imagine it is plausible to cut down to as few as two self-report, one behavioral, and one neural measure per intervention, plus sex and age. That would yield 10 different variables. At the other end, we might want 10 self-report, two behavioral, and five neural measures per intervention tested, plus 6 different demographic variables--a total of 40 variables. Let's see how these would perform, as well as mid-range of 20 predictor variables. In each case we'll restrict to three valid predictors per intervention.

In [7]:


def run_2_group_predictor_analysis(total_predictor_count, outcome_measures, analysis_data_imputed, effect_size, hyperparameter_optimizer,
                                        custom_interaction_effects=None
                                        ):

    #set np random seed
    np.random.seed(3161527)

    group_names = ['ichi','ni','san']
    #assign each row randomly to a group
    group_assignments_3 = np.random.choice(group_names,analysis_data_imputed.shape[0])
    
    #two-group analysis, comparing ichi vs ni and san
    group_assignments_2 = group_assignments_3.copy()
    group_assignments_2[group_assignments_2=='ni'] = 'nisan'
    group_assignments_2[group_assignments_2=='san'] = 'nisan'


    #synthetic outcomes
    outcome_measures = generate_synthetic_dev_outcomes(outcome_measures)

    #create a limited set of predictors
    analysis_data_smol = analysis_data_imputed.iloc[:,0:total_predictor_count]

    # add synthetic primary and interaction effects

    if custom_interaction_effects is None:
        #set up the interaction effects
        #0.08 will give us correlations around 0.3 between the interaction effects and the outcome
        custom_interaction_effects_g1 = [0]*analysis_data_smol.shape[1]
        custom_interaction_effects_g1[0] = effect_size
        custom_interaction_effects_g1[1] = effect_size
        custom_interaction_effects_g1[2] = -effect_size


        custom_interaction_effects = {'nisan':custom_interaction_effects_g1}

    


    synthetic_data = generate_synthetic_dev_data(analysis_data_smol, group_assignments_2,outcome_measures, group_interaction_effects = custom_interaction_effects)
    interaction_effect_df = synthetic_data['X_weights']
    outcome_measures = synthetic_data['y']

    # Set up outcome measures and group assignment one-hot

    outcome_measures = calculate_outcome_changes(outcome_measures)
    

    group_assignment_onehots = pd.get_dummies(group_assignments_2).loc[:,['nisan']]

    predictor_data = set_up_interactions(analysis_data_smol, group_assignment_onehots)


    #remove any NA values for this outcome measure in both the predictor data and the outcome data
    outcome_nas = outcome_measures['d_bf'].isna()

    outcome_measures_nona = outcome_measures.loc[~outcome_nas,:]
    predictor_data_nona = predictor_data.loc[~outcome_nas,:]
    group_assignment_onehots_nonan = group_assignment_onehots.loc[~outcome_nas,:]
    group_assignments_nona = group_assignments_2[~outcome_nas]

    ### Try out CV with simple gridsearch

    scoring_data = do_scoring_loop(X=predictor_data_nona, y= outcome_measures_nona['d_bf'], 
                    groups = group_assignments_nona, 
                    hyperparameter_selection_on_fold=hyperparameter_optimizer,
                    outer_folds=5)

    scores = scoring_data['scores']
    best_models = scoring_data['best_models']
    best_params_df_list = scoring_data['best_params_df_list']
    raw_cv_results_list = scoring_data['raw_cv_results_list']

    print("scores:")
    print(scores)
    overall_score = np.mean(scores)
    print("overall_score:")
    print(overall_score)



    best_model = get_best_model(summarize_overall_df_results(raw_cv_results_list))
    final_fit = do_final_fit(X=predictor_data_nona, y= outcome_measures_nona['d_bf'], final_model=best_model)
    final_results = present_model_results(X=predictor_data_nona, final_fit=final_fit, y=outcome_measures_nona['d_bf'])

    #print rows of final_results where feature_name is the list of features to check
    base_regressors = interaction_effect_df.predictor[interaction_effect_df.interaction_effect!=0]
    regressors_to_check = [x+y for y in ['','*nisan'] for x in base_regressors]
    final_results['planned_regression'] = final_results['predictor'].isin(regressors_to_check)

    present_results_vs_ground_truth_cors(predictor_data_nona,outcome_measures_nona,group_assignments_nona,final_results,base_regressors)

    return(overall_score)




In [8]:
overall_scores = pd.DataFrame(columns=['n_features','effect_size', 'overall_score'])
        
        
# hypers = {
#     'r2':do_hyperparameter_selection_loop_r2,
#     'mae':do_hyperparameter_selection_loop
# }


for pcount in [15,20,25,30]:
    for effect_size in [0.08,0.10,0.12,0.14, 0.16]:
        custom_interaction_effects_g1    =  [0]*pcount
        custom_interaction_effects_g1[0] =  effect_size
        custom_interaction_effects_g1[1] =  effect_size
        custom_interaction_effects_g1[2] = -effect_size

        custom_interaction_effects_g2 = custom_interaction_effects_g1

        custom_interaction_effects = {'nisan':custom_interaction_effects_g1}


        overall_score = run_full_limited_predictor_analysis(
            pcount,
            outcome_measures,
            analysis_data_imputed,
            effect_size= effect_size,
            hyperparameter_optimizer = do_hyperparameter_selection_loop
            )

        #run the analysis with a limited number of predictors
        overall_scores = overall_scores.append(
            {'n_features':pcount,
            'effect_size':effect_size,
            'overall_score':overall_score
            #,'hyper_target':'mae'
            },
            ignore_index=True)

['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.250385,0.053035,0.298879,0.081511
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__k': 47, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.253106,0.047541,0.289953,0.035348
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.1},-3.253106,0.047541,0.289953,0.035348
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.264384,0.061781,0.327677,0.069726
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__k': 47, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.267684,0.049697,0.329519,0.044714
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.2},-3.267684,0.049697,0.329519,0.044714
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.3, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.279107,0.06821,0.339821,0.050708
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.3},-3.279376,0.066624,0.342303,0.043856
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.3, 'feature_selection__k': 47, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.279376,0.066624,0.342303,0.043856
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 1.0, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.281872,0.081962,0.25787,0.092071


doing permutation test on importance; this may take time.
Number of selected features: 9


Unnamed: 0,predictor,coef,feature_importance,fa_abs
17,BSCS*ni,1.724228,0.382029,0.382029
45,BFI_extraversion*san,0.792246,0.096467,0.096467
19,BIS_11*ni,-0.611822,0.05063,0.05063
38,ACES_neglectful_parenting*san,-0.425717,0.028504,0.028504
27,ACES_household_dysfunction*ni,-0.234528,0.009089,0.009089
41,ACES_divorced_separated*san,-0.16172,0.00441,0.00441
7,ACES_abuse,0.133042,0.00352,0.00352
24,ACES_abuse*ni,-0.127256,0.002904,0.002904
10,ACES_household_dysfunction,0.027042,4.9e-05,4.9e-05
37,TRSQ*san,0.0,0.0,0.0


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BSCS,,1.724,-0.0,,0.382,0.0,-0.137,0.35,0.008,0.382
BFI_extraversion,,,0.792,,,0.096,,,,0.096
BIS_11,,-0.612,,,0.051,,0.047,-0.383,-0.043,0.051
ACES_neglectful_parenting,-0.0,0.0,-0.426,0.0,0.0,0.029,-0.046,-0.017,-0.218,0.029
ACES_household_dysfunction,0.027,-0.235,-0.0,0.0,0.009,0.0,,,,0.009
ACES_abuse,0.133,-0.127,0.0,0.004,0.003,0.0,,,,0.006
ACES_divorced_separated,-0.0,-0.0,-0.162,0.0,0.0,0.004,,,,0.004


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Lasso())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 47], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__k': 47, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.285414,0.048881,0.298713,0.035208
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.1},-3.285414,0.048881,0.298713,0.035208
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.300585,0.056362,0.319323,0.067024
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 1.0, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.304495,0.120749,0.276454,0.067585
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.8, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.311944,0.131912,0.276239,0.07076
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.6, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.321352,0.136164,0.276253,0.070526
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.2},-3.32423,0.046617,0.339216,0.047183
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__k': 47, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.32423,0.046617,0.339216,0.047183
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.327913,0.067239,0.341078,0.061043
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.4, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.334452,0.140091,0.276935,0.070247


doing permutation test on importance; this may take time.
Number of selected features: 19


Unnamed: 0,predictor,coef,feature_importance,fa_abs
17,BSCS*ni,1.520835,0.280856,0.280856
36,RS*san,0.60134,0.053674,0.053674
18,EDM*ni,0.521283,0.037669,0.037669
38,ACES_neglectful_parenting*san,-0.454181,0.030541,0.030541
2,BIS_11,-0.443326,0.030219,0.030219
19,BIS_11*ni,-0.452073,0.028686,0.028686
31,BFI_neuroticism*ni,-0.448212,0.028388,0.028388
13,BFI_extraversion,0.331615,0.022058,0.022058
45,BFI_extraversion*san,0.301765,0.017198,0.017198
11,BFI_agreeableness,-0.325563,0.014493,0.014493


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BSCS,-0.0,1.521,-0.0,0.0,0.281,0.0,-0.137,0.415,-0.011,0.281
BIS_11,-0.443,-0.452,-0.0,0.03,0.029,0.0,0.047,-0.44,-0.033,0.059
RS,0.108,-0.0,0.601,0.004,0.0,0.054,0.039,-0.177,0.304,0.058
EDM,0.119,0.521,-0.0,0.003,0.038,0.0,0.053,0.287,-0.065,0.04
BFI_extraversion,0.332,0.0,0.302,0.022,0.0,0.017,,,,0.039
ACES_neglectful_parenting,-0.072,0.0,-0.454,0.002,0.0,0.031,-0.046,-0.014,-0.262,0.032
BFI_neuroticism,-0.0,-0.448,0.0,0.0,0.028,0.0,,,,0.028
BFI_agreeableness,-0.326,0.0,-0.0,0.014,0.0,0.0,,,,0.014
ACES_divorced_separated,-0.0,-0.0,-0.263,0.0,0.0,0.01,,,,0.01
ACES_household_dysfunction,0.037,-0.19,-0.0,0.0,0.006,0.0,,,,0.006


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Lasso())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 47], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 1.0, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.292519,0.099375,0.274949,0.104974
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.8, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.298002,0.110155,0.276507,0.108841
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.6, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.30585,0.115535,0.278478,0.107369
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.1},-3.315633,0.048974,0.30389,0.040323
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__k': 47, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.315633,0.048974,0.30389,0.040323
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.4, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.316877,0.120681,0.280725,0.105875
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.3, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.324399,0.12296,0.282225,0.104459
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.334027,0.125584,0.285072,0.103018
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.342245,0.058045,0.321634,0.095602
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.346213,0.128463,0.289095,0.10094


doing permutation test on importance; this may take time.
Number of selected features: 25


Unnamed: 0,predictor,coef,feature_importance,fa_abs
17,BSCS*ni,4.647353,2.375551,2.375551
19,BIS_11*ni,-4.085276,1.798008,1.798008
16,san,-3.757856,1.515308,1.515308
37,TRSQ*san,2.459315,0.678605,0.678605
36,RS*san,2.159988,0.541177,0.541177
45,BFI_extraversion*san,2.129124,0.520743,0.520743
15,ni,1.865242,0.383996,0.383996
43,BFI_agreeableness*san,-1.704666,0.306805,0.306805
27,ACES_household_dysfunction*ni,-1.053132,0.115208,0.115208
23,ACES_neglectful_parenting*ni,0.921492,0.100176,0.100176


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BSCS,,4.647,0.221,,2.376,0.007,-0.137,0.472,-0.028,2.382
BIS_11,,-4.085,,,1.798,,0.047,-0.489,-0.023,1.798
san,-3.758,,,1.515,,,,,,1.515
TRSQ,,,2.459,,,0.679,0.091,-0.266,0.368,0.679
RS,,,2.16,,,0.541,0.039,-0.198,0.345,0.541
BFI_extraversion,,,2.129,,,0.521,,,,0.521
ni,1.865,,,0.384,,,,,,0.384
BFI_agreeableness,,,-1.705,,,0.307,,,,0.307
ACES_household_dysfunction,0.855,-1.053,-0.415,0.092,0.115,0.016,,,,0.223
ACES_neglectful_parenting,-0.69,0.921,-0.218,0.048,0.1,0.004,-0.046,-0.01,-0.303,0.152


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Lasso())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 47], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 1.0, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.297242,0.113614,0.265083,0.10087
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.8, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.302255,0.123156,0.264094,0.105871
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.6, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.308595,0.126664,0.263906,0.106069
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.4, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.317413,0.130538,0.264239,0.107501
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.3, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.324057,0.131626,0.264591,0.108446
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.332499,0.13219,0.26564,0.109458
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.1},-3.338442,0.051539,0.308855,0.039318
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__k': 47, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.338442,0.051539,0.308855,0.039318
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.343705,0.133011,0.268242,0.111221
"dict_values([StandardScaler(), Ridge()])",{'estimator__alpha': 1.0},-3.370348,0.080335,0.231593,0.047919


doing permutation test on importance; this may take time.
Number of selected features: 25


Unnamed: 0,predictor,coef,feature_importance,fa_abs
17,BSCS*ni,5.240274,2.78422,2.78422
19,BIS_11*ni,-4.66262,2.220577,2.220577
16,san,-4.284614,1.834663,1.834663
37,TRSQ*san,2.894945,0.857064,0.857064
36,RS*san,2.483538,0.62898,0.62898
45,BFI_extraversion*san,2.119595,0.459059,0.459059
15,ni,1.805867,0.338728,0.338728
43,BFI_agreeableness*san,-1.765087,0.310993,0.310993
27,ACES_household_dysfunction*ni,-1.073831,0.111741,0.111741
23,ACES_neglectful_parenting*ni,0.94803,0.094241,0.094241


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BSCS,,5.24,0.148,,2.784,0.002,-0.137,0.521,-0.044,2.787
BIS_11,,-4.663,,,2.221,,0.047,-0.531,-0.013,2.221
san,-4.285,,,1.835,,,,,,1.835
TRSQ,,,2.895,,,0.857,0.091,-0.283,0.411,0.857
RS,,,2.484,,,0.629,0.039,-0.215,0.381,0.629
BFI_extraversion,,,2.12,,,0.459,,,,0.459
ni,1.806,,,0.339,,,,,,0.339
BFI_agreeableness,,,-1.765,,,0.311,,,,0.311
ACES_household_dysfunction,0.858,-1.074,-0.411,0.076,0.112,0.017,,,,0.205
ACES_neglectful_parenting,-0.69,0.948,-0.348,0.051,0.094,0.012,-0.046,-0.007,-0.338,0.157


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Lasso())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 47], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 1.0, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.320681,0.120835,0.271586,0.110299
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.8, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.324916,0.132795,0.270193,0.117291
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.6, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.330933,0.137671,0.270299,0.119577
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.4, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.339597,0.141897,0.271209,0.124281
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.3, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.345851,0.143454,0.272546,0.127838
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.354518,0.143893,0.274842,0.132536
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.1},-3.357496,0.056374,0.312419,0.039459
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__k': 47, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.357496,0.056374,0.312419,0.039459
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.366299,0.143883,0.279977,0.138598
"dict_values([StandardScaler(), Ridge()])",{'estimator__alpha': 1.0},-3.373992,0.081563,0.236647,0.047876


doing permutation test on importance; this may take time.
Number of selected features: 25


Unnamed: 0,predictor,coef,feature_importance,fa_abs
19,BIS_11*ni,-4.994835,2.377183,2.377183
16,san,-4.811965,2.158199,2.158199
17,BSCS*ni,4.16675,1.642677,1.642677
37,TRSQ*san,3.338437,1.060573,1.060573
36,RS*san,2.812751,0.751929,0.751929
18,EDM*ni,2.371279,0.523716,0.523716
45,BFI_extraversion*san,2.110092,0.425058,0.425058
43,BFI_agreeableness*san,-1.825398,0.309397,0.309397
27,ACES_household_dysfunction*ni,-0.971782,0.084796,0.084796
10,ACES_household_dysfunction,0.853285,0.070007,0.070007


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BIS_11,,-4.995,,,2.377,,0.047,-0.567,-0.004,2.377
san,-4.812,,,2.158,,,,,,2.158
BSCS,,4.167,0.074,,1.643,0.001,-0.137,0.564,-0.058,1.643
TRSQ,,,3.338,,,1.061,0.091,-0.297,0.45,1.061
RS,,,2.813,,,0.752,0.039,-0.23,0.412,0.752
EDM,,2.371,,,0.524,,0.053,0.412,-0.087,0.524
BFI_extraversion,,,2.11,,,0.425,,,,0.425
BFI_agreeableness,,,-1.825,,,0.309,,,,0.309
ACES_household_dysfunction,0.853,-0.972,-0.411,0.07,0.085,0.016,,,,0.171
ACES_neglectful_parenting,-0.699,0.783,-0.478,0.049,0.06,0.021,-0.046,-0.005,-0.37,0.131


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Lasso())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 50], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.268368,0.047258,0.3092,0.096481
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.269053,0.072854,0.333186,0.078243
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.2},-3.280129,0.053543,0.33451,0.041095
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.3, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.282265,0.075672,0.338148,0.053572
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.4, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.28558,0.077469,0.341224,0.051844
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__k': 50, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.285924,0.054509,0.336622,0.03436
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.3},-3.290811,0.068708,0.346953,0.023353
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.3, 'feature_selection__k': 50, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.290827,0.068625,0.347245,0.02355
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), DecisionTreeRegressor()])","{'estimator__max_depth': 2, 'estimator__min_samples_leaf': 20, 'estimator__min_samples_split': 50, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.292086,0.054613,0.33136,0.084547
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.6, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.292451,0.077121,0.352444,0.044752


doing permutation test on importance; this may take time.
Number of selected features: 9


Unnamed: 0,predictor,coef,feature_importance,fa_abs
22,BSCS*ni,1.724228,0.385828,0.385828
55,BFI_extraversion*san,0.792246,0.092698,0.092698
24,BIS_11*ni,-0.611822,0.056835,0.056835
48,ACES_neglectful_parenting*san,-0.425717,0.01945,0.01945
32,ACES_household_dysfunction*ni,-0.234528,0.010201,0.010201
7,ACES_abuse,0.133042,0.006173,0.006173
51,ACES_divorced_separated*san,-0.16172,0.002168,0.002168
29,ACES_abuse*ni,-0.127256,0.00118,0.00118
10,ACES_household_dysfunction,0.027042,0.00071,0.00071
47,TRSQ*san,0.0,0.0,0.0


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BSCS,,1.724,-0.0,,0.386,0.0,-0.137,0.35,0.008,0.386
BFI_extraversion,,,0.792,,,0.093,,,,0.093
BIS_11,,-0.612,,,0.057,,0.047,-0.383,-0.043,0.057
ACES_neglectful_parenting,-0.0,0.0,-0.426,0.0,0.0,0.019,-0.046,-0.017,-0.218,0.019
ACES_household_dysfunction,0.027,-0.235,-0.0,0.001,0.01,0.0,,,,0.011
ACES_abuse,0.133,-0.127,0.0,0.006,0.001,0.0,,,,0.007
ACES_divorced_separated,-0.0,-0.0,-0.162,0.0,0.0,0.002,,,,0.002


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Ridge())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 50], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.304918,0.06344,0.327492,0.108845
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.1},-3.327857,0.05111,0.296022,0.041828
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.33402,0.06504,0.340143,0.075081
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__k': 50, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.337082,0.041067,0.301169,0.054855
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.2},-3.337902,0.055844,0.341664,0.040095
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__k': 50, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.347777,0.059058,0.344863,0.033602
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), DecisionTreeRegressor()])","{'estimator__max_depth': 2, 'estimator__min_samples_leaf': 50, 'estimator__min_samples_split': 20, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.348666,0.117381,0.353344,0.102345
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), DecisionTreeRegressor()])","{'estimator__max_depth': 4, 'estimator__min_samples_leaf': 50, 'estimator__min_samples_split': 50, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.348666,0.117381,0.353344,0.102345
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), DecisionTreeRegressor()])","{'estimator__max_depth': 2, 'estimator__min_samples_leaf': 50, 'estimator__min_samples_split': 50, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.348666,0.117381,0.353344,0.102345
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), DecisionTreeRegressor()])","{'estimator__max_depth': 4, 'estimator__min_samples_leaf': 50, 'estimator__min_samples_split': 20, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.351489,0.111644,0.349906,0.099458


doing permutation test on importance; this may take time.
Number of selected features: 9


Unnamed: 0,predictor,coef,feature_importance,fa_abs
22,BSCS*ni,2.341333,0.670922,0.670922
24,BIS_11*ni,-1.260258,0.208286,0.208286
55,BFI_extraversion*san,0.86193,0.102452,0.102452
48,ACES_neglectful_parenting*san,-0.54451,0.03121,0.03121
32,ACES_household_dysfunction*ni,-0.232784,0.009447,0.009447
7,ACES_abuse,0.122362,0.00521,0.00521
51,ACES_divorced_separated*san,-0.182961,0.002939,0.002939
29,ACES_abuse*ni,-0.085507,0.000302,0.000302
10,ACES_household_dysfunction,0.009696,0.000223,0.000223
47,TRSQ*san,0.0,0.0,0.0


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BSCS,,2.341,-0.0,,0.671,0.0,-0.137,0.415,-0.011,0.671
BIS_11,,-1.26,,,0.208,,0.047,-0.44,-0.033,0.208
BFI_extraversion,,,0.862,,,0.102,,,,0.102
ACES_neglectful_parenting,-0.0,0.0,-0.545,0.0,0.0,0.031,-0.046,-0.014,-0.262,0.031
ACES_household_dysfunction,0.01,-0.233,-0.0,0.0,0.009,0.0,,,,0.01
ACES_abuse,0.122,-0.086,-0.0,0.005,0.0,0.0,,,,0.006
ACES_divorced_separated,-0.0,-0.0,-0.183,0.0,0.0,0.003,,,,0.003


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Ridge())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 50], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.343991,0.067163,0.336979,0.107993
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.1},-3.358898,0.054693,0.302829,0.046525
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__k': 50, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.368629,0.040869,0.313858,0.062584
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 1.0, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.379095,0.114465,0.266752,0.112837
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.8, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.389212,0.123884,0.266222,0.116949
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.6, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.401335,0.126595,0.265415,0.11472
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.2},-3.402237,0.056786,0.350545,0.040683
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.405025,0.068458,0.346732,0.07352
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__k': 50, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.413015,0.064153,0.355426,0.034867
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.4, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.417969,0.129339,0.264802,0.113479


doing permutation test on importance; this may take time.
Number of selected features: 10


Unnamed: 0,predictor,coef,feature_importance,fa_abs
22,BSCS*ni,2.991018,1.026009,1.026009
24,BIS_11*ni,-1.882349,0.425273,0.425273
48,ACES_neglectful_parenting*san,-0.72314,0.053218,0.053218
46,RS*san,0.61181,0.049923,0.049923
55,BFI_extraversion*san,0.440505,0.028777,0.028777
32,ACES_household_dysfunction*ni,-0.236736,0.009196,0.009196
7,ACES_abuse,0.167047,0.007538,0.007538
51,ACES_divorced_separated*san,-0.258057,0.00604,0.00604
29,ACES_abuse*ni,-0.082954,0.000233,0.000233
10,ACES_household_dysfunction,0.000588,1.2e-05,1.2e-05


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BSCS,,2.991,-0.0,,1.026,0.0,-0.137,0.472,-0.028,1.026
BIS_11,,-1.882,,,0.425,,0.047,-0.489,-0.023,0.425
ACES_neglectful_parenting,-0.0,0.0,-0.723,0.0,0.0,0.053,-0.046,-0.01,-0.303,0.053
RS,,,0.612,,,0.05,0.039,-0.198,0.345,0.05
BFI_extraversion,,,0.441,,,0.029,,,,0.029
ACES_household_dysfunction,0.001,-0.237,-0.0,0.0,0.009,0.0,,,,0.009
ACES_abuse,0.167,-0.083,-0.0,0.008,0.0,0.0,,,,0.008
ACES_divorced_separated,0.0,-0.0,-0.258,0.0,0.0,0.006,,,,0.006


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Lasso())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 50], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 1.0, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.37233,0.132148,0.266057,0.104696
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.8, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.380018,0.143044,0.266194,0.107959
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.1},-3.384399,0.05974,0.307596,0.04938
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.388456,0.077113,0.342498,0.129107
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.6, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.389359,0.146334,0.266195,0.104915
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__k': 50, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.401305,0.045319,0.328042,0.060187
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.4, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.402743,0.149005,0.266155,0.104132
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.3, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.412038,0.150532,0.266693,0.104996
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.424007,0.152456,0.267165,0.106479
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.439368,0.153732,0.266448,0.108386


doing permutation test on importance; this may take time.
Number of selected features: 25


Unnamed: 0,predictor,coef,feature_importance,fa_abs
22,BSCS*ni,5.240274,2.788697,2.788697
24,BIS_11*ni,-4.66262,2.184839,2.184839
21,san,-4.284614,1.849343,1.849343
47,TRSQ*san,2.894945,0.86366,0.86366
46,RS*san,2.483538,0.645281,0.645281
55,BFI_extraversion*san,2.119595,0.476628,0.476628
20,ni,1.805867,0.331842,0.331842
53,BFI_agreeableness*san,-1.765087,0.309773,0.309773
32,ACES_household_dysfunction*ni,-1.073831,0.111653,0.111653
28,ACES_neglectful_parenting*ni,0.94803,0.094194,0.094194


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BSCS,,5.24,0.148,,2.789,0.003,-0.137,0.521,-0.044,2.792
BIS_11,,-4.663,,,2.185,,0.047,-0.531,-0.013,2.185
san,-4.285,,,1.849,,,,,,1.849
TRSQ,,,2.895,,,0.864,0.091,-0.283,0.411,0.864
RS,,,2.484,,,0.645,0.039,-0.215,0.381,0.645
BFI_extraversion,,,2.12,,,0.477,,,,0.477
ni,1.806,,,0.332,,,,,,0.332
BFI_agreeableness,,,-1.765,,,0.31,,,,0.31
ACES_household_dysfunction,0.858,-1.074,-0.411,0.065,0.112,0.022,,,,0.199
ACES_neglectful_parenting,-0.69,0.948,-0.348,0.053,0.094,0.014,-0.046,-0.007,-0.338,0.161


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Lasso())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 50], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 1.0, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.356747,0.128225,0.291423,0.075531
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.8, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.362828,0.135299,0.291287,0.076494
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.6, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.371313,0.134219,0.291837,0.073208
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.4, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.384897,0.13237,0.292289,0.074006
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.3, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.394117,0.13184,0.293012,0.076542
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.1},-3.405611,0.064766,0.309535,0.048344
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.406003,0.131648,0.294088,0.081165
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.421842,0.066927,0.327621,0.124104
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.422014,0.131296,0.294047,0.085608
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__k': 50, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.428075,0.047556,0.332779,0.073471


doing permutation test on importance; this may take time.
Number of selected features: 25


Unnamed: 0,predictor,coef,feature_importance,fa_abs
24,BIS_11*ni,-4.994835,2.398333,2.398333
21,san,-4.811965,2.251003,2.251003
22,BSCS*ni,4.16675,1.704088,1.704088
47,TRSQ*san,3.338437,1.076983,1.076983
46,RS*san,2.812751,0.755284,0.755284
23,EDM*ni,2.371279,0.556376,0.556376
55,BFI_extraversion*san,2.110092,0.417158,0.417158
53,BFI_agreeableness*san,-1.825398,0.32565,0.32565
32,ACES_household_dysfunction*ni,-0.971782,0.094565,0.094565
10,ACES_household_dysfunction,0.853285,0.062435,0.062435


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BIS_11,,-4.995,,,2.398,,0.047,-0.567,-0.004,2.398
san,-4.812,,,2.251,,,,,,2.251
BSCS,,4.167,0.074,,1.704,0.0,-0.137,0.564,-0.058,1.704
TRSQ,,,3.338,,,1.077,0.091,-0.297,0.45,1.077
RS,,,2.813,,,0.755,0.039,-0.23,0.412,0.755
EDM,,2.371,,,0.556,,0.053,0.412,-0.087,0.556
BFI_extraversion,,,2.11,,,0.417,,,,0.417
BFI_agreeableness,,,-1.825,,,0.326,,,,0.326
ACES_household_dysfunction,0.853,-0.972,-0.411,0.062,0.095,0.018,,,,0.175
ACES_neglectful_parenting,-0.699,0.783,-0.478,0.05,0.057,0.022,-0.046,-0.005,-0.37,0.129


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Ridge())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 50], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.274434,0.070966,0.332318,0.074649
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.275745,0.060991,0.30821,0.076795
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.3, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.282612,0.072588,0.338813,0.053075
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), DecisionTreeRegressor()])","{'estimator__max_depth': 2, 'estimator__min_samples_leaf': 50, 'estimator__min_samples_split': 50, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.285153,0.115366,0.335123,0.064818
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), DecisionTreeRegressor()])","{'estimator__max_depth': 2, 'estimator__min_samples_leaf': 50, 'estimator__min_samples_split': 20, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.285153,0.115366,0.335123,0.064818
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), DecisionTreeRegressor()])","{'estimator__max_depth': 4, 'estimator__min_samples_leaf': 50, 'estimator__min_samples_split': 20, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.285153,0.115366,0.335123,0.064818
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), DecisionTreeRegressor()])","{'estimator__max_depth': 4, 'estimator__min_samples_leaf': 50, 'estimator__min_samples_split': 50, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.285153,0.115366,0.335123,0.064818
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.4, 'feature_selection__k': 10, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.286944,0.064742,0.3641,0.047237
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.4, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.287004,0.077225,0.340647,0.051352
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.6, 'feature_selection__k': 10, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.288484,0.070706,0.362991,0.039062


doing permutation test on importance; this may take time.
Number of selected features: 4


Unnamed: 0,predictor,coef,feature_importance,fa_abs
27,BSCS*ni,0.745645,0.086381,0.086381
65,BFI_extraversion*san,0.403295,0.031686,0.031686
58,ACES_neglectful_parenting*san,-0.194827,0.010759,0.010759
6,ACES_neglectful_parenting,-0.059078,0.002682,0.002682
39,BFI_conscientiousness*ni,0.0,0.0,0.0
69,IMI_effort_importance*san,0.0,0.0,0.0
64,BFI_conscientiousness*san,0.0,0.0,0.0
63,BFI_agreeableness*san,0.0,0.0,0.0
62,ACES_household_dysfunction*san,-0.0,0.0,0.0
61,ACES_divorced_separated*san,-0.0,0.0,0.0


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BSCS,,0.746,,,0.086,,-0.137,0.35,0.008,0.086
BFI_extraversion,,,0.403,,,0.032,,,,0.032
ACES_neglectful_parenting,-0.059,0.0,-0.195,0.003,0.0,0.011,-0.046,-0.017,-0.218,0.013


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Ridge())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 50], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.340116,0.063683,0.326978,0.062987
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.348463,0.077774,0.347851,0.068625
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.3, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.358002,0.078881,0.353763,0.052688
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.3},-3.362821,0.080222,0.376775,0.040931
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.4, 'feature_selection__k': 10, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.364119,0.07608,0.377657,0.050867
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.6, 'feature_selection__k': 10, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.365052,0.074682,0.373224,0.040124
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.4, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.365061,0.082236,0.354207,0.048839
"dict_values([StandardScaler(), SelectKBest(), DecisionTreeRegressor()])","{'estimator__max_depth': 2, 'estimator__min_samples_leaf': 20, 'estimator__min_samples_split': 50, 'feature_selection__k': 25, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.36577,0.095508,0.374643,0.059375
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.6, 'feature_selection__k': 25, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.365782,0.077736,0.373964,0.040642
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.6, 'feature_selection__k': 50, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.365782,0.077736,0.373964,0.040642


doing permutation test on importance; this may take time.
Number of selected features: 9


Unnamed: 0,predictor,coef,feature_importance,fa_abs
27,BSCS*ni,2.341349,0.657891,0.657891
29,BIS_11*ni,-1.260271,0.191827,0.191827
65,BFI_extraversion*san,0.861931,0.107155,0.107155
58,ACES_neglectful_parenting*san,-0.544511,0.041783,0.041783
37,ACES_household_dysfunction*ni,-0.232786,0.008618,0.008618
61,ACES_divorced_separated*san,-0.182961,0.005119,0.005119
7,ACES_abuse,0.122362,0.002992,0.002992
34,ACES_abuse*ni,-0.085508,0.001458,0.001458
10,ACES_household_dysfunction,0.009697,2e-06,2e-06
57,TRSQ*san,0.0,0.0,0.0


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BSCS,,2.341,,,0.658,,-0.137,0.415,-0.011,0.658
BIS_11,,-1.26,-0.0,,0.192,0.0,0.047,-0.44,-0.033,0.192
BFI_extraversion,,,0.862,,,0.107,,,,0.107
ACES_neglectful_parenting,-0.0,0.0,-0.545,0.0,0.0,0.042,-0.046,-0.014,-0.262,0.042
ACES_household_dysfunction,0.01,-0.233,-0.0,0.0,0.009,0.0,,,,0.009
ACES_divorced_separated,-0.0,-0.0,-0.183,0.0,0.0,0.005,,,,0.005
ACES_abuse,0.122,-0.086,-0.0,0.003,0.001,0.0,,,,0.004


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Ridge())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 50], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.390133,0.094766,0.30774,0.097622
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.415409,0.08388,0.34158,0.082041
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.1},-3.436241,0.04553,0.337219,0.057969
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.3, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.437159,0.084356,0.357706,0.060856
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.3},-3.437992,0.092472,0.384179,0.049647
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.2},-3.440444,0.080662,0.36464,0.058563
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__k': 50, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.445314,0.072052,0.333376,0.052638
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.4},-3.448447,0.093727,0.392567,0.041973
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.4, 'feature_selection__k': 50, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.449391,0.093443,0.392633,0.04193
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.4, 'feature_selection__k': 10, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.45034,0.08484,0.387925,0.055295


doing permutation test on importance; this may take time.
Number of selected features: 9


Unnamed: 0,predictor,coef,feature_importance,fa_abs
27,BSCS*ni,2.963519,1.009153,1.009153
29,BIS_11*ni,-1.910246,0.436911,0.436911
65,BFI_extraversion*san,0.926245,0.109972,0.109972
58,ACES_neglectful_parenting*san,-0.666139,0.044971,0.044971
37,ACES_household_dysfunction*ni,-0.237334,0.009086,0.009086
7,ACES_abuse,0.109589,0.004214,0.004214
61,ACES_divorced_separated*san,-0.205338,0.003785,0.003785
57,TRSQ*san,0.007753,0.000102,0.000102
34,ACES_abuse*ni,-0.04253,-7.6e-05,7.6e-05
69,IMI_effort_importance*san,0.0,0.0,0.0


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BSCS,,2.964,,,1.009,,-0.137,0.472,-0.028,1.009
BIS_11,,-1.91,-0.0,,0.437,0.0,0.047,-0.489,-0.023,0.437
BFI_extraversion,,,0.926,,,0.11,,,,0.11
ACES_neglectful_parenting,-0.0,0.0,-0.666,0.0,0.0,0.045,-0.046,-0.01,-0.303,0.045
ACES_household_dysfunction,0.0,-0.237,-0.0,0.0,0.009,0.0,,,,0.009
ACES_abuse,0.11,-0.043,-0.0,0.004,-0.0,0.0,,,,0.004
ACES_divorced_separated,-0.0,-0.0,-0.205,0.0,0.0,0.004,,,,0.004


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Ridge())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 50], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.449656,0.101287,0.318538,0.090222
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.1},-3.457602,0.046165,0.340766,0.05943
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__k': 50, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.467001,0.057481,0.348051,0.048804
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.486851,0.093757,0.350438,0.083999
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.2},-3.510742,0.089821,0.377779,0.066679
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 1.0, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.516749,0.152337,0.278062,0.038589
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.3},-3.518775,0.107028,0.391315,0.056906
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__k': 25, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.526652,0.089158,0.417237,0.041488
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.8, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.531238,0.166112,0.278001,0.03688
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.3, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.531247,0.098566,0.364315,0.063708


doing permutation test on importance; this may take time.
Number of selected features: 12


Unnamed: 0,predictor,coef,feature_importance,fa_abs
27,BSCS*ni,3.723237,1.48341,1.48341
29,BIS_11*ni,-2.227152,0.552557,0.552557
56,RS*san,0.948716,0.105262,0.105262
58,ACES_neglectful_parenting*san,-0.873699,0.073795,0.073795
41,BFI_neuroticism*ni,-0.408866,0.022619,0.022619
65,BFI_extraversion*san,0.284311,0.012693,0.012693
37,ACES_household_dysfunction*ni,-0.261748,0.010188,0.010188
61,ACES_divorced_separated*san,-0.309666,0.008593,0.008593
7,ACES_abuse,0.177284,0.007712,0.007712
33,ACES_neglectful_parenting*ni,0.050809,0.001234,0.001234


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BSCS,,3.723,,,1.483,,-0.137,0.521,-0.044,1.483
BIS_11,,-2.227,-0.0,,0.553,0.0,0.047,-0.531,-0.013,0.553
RS,,,0.949,,,0.105,0.039,-0.215,0.381,0.105
ACES_neglectful_parenting,-0.0,0.051,-0.874,0.0,0.001,0.074,-0.046,-0.007,-0.338,0.075
BFI_neuroticism,,-0.409,,,0.023,,,,,0.023
BFI_extraversion,,,0.284,,,0.013,,,,0.013
ACES_household_dysfunction,0.0,-0.262,-0.0,0.0,0.01,0.0,,,,0.01
ACES_divorced_separated,0.0,-0.0,-0.31,0.0,0.0,0.009,,,,0.009
ACES_abuse,0.177,-0.074,-0.0,0.008,0.0,0.0,,,,0.008


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Ridge())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 50], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.46024,0.12991,0.326718,0.110011
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.1},-3.475448,0.049498,0.34345,0.064093
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__k': 50, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.503705,0.061328,0.331569,0.04199
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 1.0, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.526217,0.16946,0.308823,0.082503
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.534894,0.110267,0.354301,0.099688
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.8, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.541888,0.182716,0.310287,0.083807
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.6, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.560504,0.185758,0.31168,0.08009
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.2},-3.571159,0.085722,0.391035,0.074366
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.4, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.583642,0.188797,0.31183,0.0754
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.3, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.598497,0.190539,0.312207,0.07318


doing permutation test on importance; this may take time.
Number of selected features: 12


Unnamed: 0,predictor,coef,feature_importance,fa_abs
27,BSCS*ni,4.270023,1.816356,1.816356
29,BIS_11*ni,-3.162491,1.021735,1.021735
56,RS*san,1.302038,0.178412,0.178412
58,ACES_neglectful_parenting*san,-1.024869,0.095663,0.095663
61,ACES_divorced_separated*san,-0.364256,0.011471,0.011471
37,ACES_household_dysfunction*ni,-0.277059,0.010393,0.010393
7,ACES_abuse,0.192148,0.008006,0.008006
65,BFI_extraversion*san,0.124334,0.003205,0.003205
73,IMI_perceived_competence*san,-0.100548,0.001728,0.001728
33,ACES_neglectful_parenting*ni,0.048663,0.001109,0.001109


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BSCS,,4.27,,,1.816,,-0.137,0.564,-0.058,1.816
BIS_11,,-3.162,-0.008,,1.022,0.0,0.047,-0.567,-0.004,1.022
RS,,,1.302,,,0.178,0.039,-0.23,0.412,0.178
ACES_neglectful_parenting,-0.0,0.049,-1.025,0.0,0.001,0.096,-0.046,-0.005,-0.37,0.097
ACES_divorced_separated,0.0,0.0,-0.364,0.0,0.0,0.011,,,,0.011
ACES_household_dysfunction,0.0,-0.277,-0.0,0.0,0.01,0.0,,,,0.01
ACES_abuse,0.192,-0.051,-0.0,0.008,-0.0,0.0,,,,0.008
BFI_extraversion,,,0.124,,,0.003,,,,0.003
IMI_perceived_competence,,,-0.101,,,0.002,,,,0.002


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Ridge())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 50], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.278751,0.067542,0.321748,0.06564
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.28178,0.060949,0.281906,0.092456
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.3, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.285367,0.073604,0.333488,0.048831
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.4, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.288392,0.076775,0.338952,0.048485
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.6, 'feature_selection__k': 25, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.288882,0.072497,0.353523,0.029416
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.6, 'feature_selection__k': 50, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.288882,0.072497,0.353523,0.029416
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.6},-3.288882,0.072497,0.353523,0.029416
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.6, 'feature_selection__k': 10, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.288989,0.072798,0.352432,0.028336
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.4, 'feature_selection__k': 10, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.289093,0.075263,0.347401,0.030674
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.4},-3.290096,0.072905,0.351934,0.027489


doing permutation test on importance; this may take time.
Number of selected features: 4


Unnamed: 0,predictor,coef,feature_importance,fa_abs
32,BSCS*ni,1.008567,0.149602,0.149602
75,BFI_extraversion*san,0.410348,0.032607,0.032607
58,NCS_thinking_not_exciting*ni,-0.311953,0.018373,0.018373
68,ACES_neglectful_parenting*san,-0.237632,0.014364,0.014364
30,ni,0.0,0.0,0.0
66,RS*san,0.0,0.0,0.0
74,BFI_conscientiousness*san,0.0,0.0,0.0
73,BFI_agreeableness*san,0.0,0.0,0.0
72,ACES_household_dysfunction*san,-0.0,0.0,0.0
71,ACES_divorced_separated*san,-0.0,0.0,0.0


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BSCS,,1.009,0.0,,0.15,0.0,-0.137,0.35,0.008,0.15
BFI_extraversion,,,0.41,,,0.033,,,,0.033
NCS_thinking_not_exciting,,-0.312,,,0.018,,,,,0.018
ACES_neglectful_parenting,,-0.0,-0.238,,0.0,0.014,-0.046,-0.017,-0.218,0.014


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Ridge())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 50], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.345067,0.055138,0.267919,0.103437
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), DecisionTreeRegressor()])","{'estimator__max_depth': 4, 'estimator__min_samples_leaf': 50, 'estimator__min_samples_split': 20, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.357484,0.124513,0.33895,0.067131
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), DecisionTreeRegressor()])","{'estimator__max_depth': 4, 'estimator__min_samples_leaf': 50, 'estimator__min_samples_split': 50, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.357484,0.124513,0.33895,0.067131
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), DecisionTreeRegressor()])","{'estimator__max_depth': 2, 'estimator__min_samples_leaf': 50, 'estimator__min_samples_split': 20, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.357484,0.124513,0.33895,0.067131
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), DecisionTreeRegressor()])","{'estimator__max_depth': 2, 'estimator__min_samples_leaf': 50, 'estimator__min_samples_split': 50, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.357484,0.124513,0.33895,0.067131
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.359964,0.071889,0.321283,0.069907
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.3, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.360088,0.079753,0.33916,0.042838
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.6, 'feature_selection__k': 50, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.363396,0.077149,0.367191,0.030518
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.6},-3.363396,0.077149,0.367191,0.030518
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.6, 'feature_selection__k': 25, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.363416,0.077165,0.367186,0.030515


doing permutation test on importance; this may take time.
Number of selected features: 9


Unnamed: 0,predictor,coef,feature_importance,fa_abs
32,BSCS*ni,2.564326,0.799749,0.799749
58,NCS_thinking_not_exciting*ni,-1.044852,0.141007,0.141007
75,BFI_extraversion*san,0.64522,0.060394,0.060394
34,BIS_11*ni,-0.599573,0.052256,0.052256
68,ACES_neglectful_parenting*san,-0.51944,0.027989,0.027989
66,RS*san,0.222472,0.009338,0.009338
42,ACES_household_dysfunction*ni,-0.158232,0.005242,0.005242
71,ACES_divorced_separated*san,-0.193473,0.003157,0.003157
39,ACES_abuse*ni,-0.075831,0.000254,0.000254
37,TRSQ*ni,-0.0,0.0,0.0


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BSCS,,2.564,-0.0,,0.8,0.0,-0.137,0.415,-0.011,0.8
NCS_thinking_not_exciting,,-1.045,,,0.141,,,,,0.141
BFI_extraversion,,,0.645,,,0.06,,,,0.06
BIS_11,,-0.6,-0.0,,0.052,0.0,0.047,-0.44,-0.033,0.052
ACES_neglectful_parenting,,0.0,-0.519,,0.0,0.028,-0.046,-0.014,-0.262,0.028
RS,,,0.222,,,0.009,0.039,-0.177,0.304,0.009
ACES_household_dysfunction,,-0.158,-0.0,,0.005,0.0,,,,0.005
ACES_divorced_separated,,-0.0,-0.193,,0.0,0.003,,,,0.003


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Ridge())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 50], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.378012,0.043888,0.317027,0.116114
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.416709,0.064195,0.351041,0.087232
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.3, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.433182,0.073417,0.360779,0.063187
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.3},-3.440148,0.102306,0.368202,0.044371
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.4},-3.445901,0.100932,0.3825,0.03804
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.4, 'feature_selection__k': 50, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.448646,0.101041,0.380086,0.039904
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.4, 'feature_selection__k': 10, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.449344,0.090335,0.374547,0.044644
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.6, 'feature_selection__k': 25, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.449466,0.085184,0.37543,0.031712
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.6},-3.449639,0.085264,0.375395,0.0317
"dict_values([StandardScaler(), SelectKBest(), Lasso()])","{'estimator__alpha': 0.6, 'feature_selection__k': 50, 'feature_selection__score_func': <function f_regression at 0x17e5b7eb0>}",-3.449639,0.085264,0.375395,0.0317


doing permutation test on importance; this may take time.
Number of selected features: 9


Unnamed: 0,predictor,coef,feature_importance,fa_abs
32,BSCS*ni,3.20613,1.174457,1.174457
34,BIS_11*ni,-1.217887,0.184635,0.184635
58,NCS_thinking_not_exciting*ni,-1.075569,0.140549,0.140549
68,ACES_neglectful_parenting*san,-0.665682,0.044539,0.044539
66,RS*san,0.522922,0.037803,0.037803
75,BFI_extraversion*san,0.472511,0.032509,0.032509
42,ACES_household_dysfunction*ni,-0.168079,0.005409,0.005409
71,ACES_divorced_separated*san,-0.242887,0.005161,0.005161
39,ACES_abuse*ni,-0.044481,-2.7e-05,2.7e-05
40,ACES_sum*ni,-0.0,0.0,0.0


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BSCS,,3.206,-0.0,,1.174,0.0,-0.137,0.472,-0.028,1.174
BIS_11,,-1.218,-0.0,,0.185,0.0,0.047,-0.489,-0.023,0.185
NCS_thinking_not_exciting,,-1.076,,,0.141,,,,,0.141
ACES_neglectful_parenting,,0.0,-0.666,,0.0,0.045,-0.046,-0.01,-0.303,0.045
RS,,,0.523,,,0.038,0.039,-0.198,0.345,0.038
BFI_extraversion,,,0.473,,,0.033,,,,0.033
ACES_household_dysfunction,,-0.168,-0.0,,0.005,0.0,,,,0.005
ACES_divorced_separated,,-0.0,-0.243,,0.0,0.005,,,,0.005


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Ridge())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 50], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.429569,0.055016,0.315018,0.096599
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 1.0, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.469909,0.116103,0.274563,0.090642
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.479009,0.077045,0.346521,0.092337
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.8, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.480859,0.131133,0.280967,0.098037
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.6, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.496868,0.138584,0.289778,0.103582
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.1},-3.502371,0.05875,0.317726,0.081735
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.2},-3.517531,0.092582,0.341329,0.058186
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.3, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.518789,0.089888,0.359279,0.073587
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.4, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.519029,0.146869,0.299731,0.112253
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.3},-3.519248,0.117119,0.368084,0.054499


doing permutation test on importance; this may take time.
Number of selected features: 11


Unnamed: 0,predictor,coef,feature_importance,fa_abs
32,BSCS*ni,2.57896,0.700753,0.700753
34,BIS_11*ni,-2.491251,0.645394,0.645394
33,EDM*ni,1.415214,0.220275,0.220275
66,RS*san,0.879578,0.096221,0.096221
68,ACES_neglectful_parenting*san,-0.811326,0.076213,0.076213
46,BFI_neuroticism*ni,-0.434982,0.023154,0.023154
75,BFI_extraversion*san,0.344945,0.019924,0.019924
71,ACES_divorced_separated*san,-0.294496,0.010628,0.010628
42,ACES_household_dysfunction*ni,-0.215662,0.006699,0.006699
73,BFI_agreeableness*san,-0.095991,0.00124,0.00124


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BSCS,,2.579,-0.0,,0.701,0.0,-0.137,0.521,-0.044,0.701
BIS_11,,-2.491,-0.0,,0.645,0.0,0.047,-0.531,-0.013,0.645
EDM,,1.415,,,0.22,,0.053,0.376,-0.08,0.22
RS,,,0.88,,,0.096,0.039,-0.215,0.381,0.096
ACES_neglectful_parenting,,0.0,-0.811,,0.0,0.076,-0.046,-0.007,-0.338,0.076
BFI_neuroticism,,-0.435,,,0.023,,,,,0.023
BFI_extraversion,,,0.345,,,0.02,,,,0.02
ACES_divorced_separated,,-0.0,-0.294,,0.0,0.011,,,,0.011
ACES_household_dysfunction,,-0.216,-0.0,,0.007,0.0,,,,0.007
BFI_agreeableness,,,-0.096,,,0.001,,,,0.001


['ni' 'san']
[1.28335298 0.42953651]
['san' 'san' 'ni' 'ichi' 'san' 'san' 'ichi' 'san' 'san' 'san' 'ni' 'ichi'
 'ichi' 'ichi' 'ichi' 'san' 'san' 'san' 'ichi' 'ichi' 'san' 'san' 'ni'
 'ni' 'ni' 'ni' 'ni' 'ni' 'ni' 'san' 'ni' 'san' 'ni' 'ichi' 'ni' 'san'
 'ni' 'ichi' 'san' 'ni' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ni' 'ni'
 'san' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ichi' 'ni' 'ni' 'ni' 'ichi' 'san'
 'ni' 'ni' 'ichi' 'ni' 'ichi' 'san' 'ni' 'ni' 'ni' 'san' 'ichi' 'ni' 'san'
 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'ichi' 'san' 'ichi' 'san' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'san' 'ni' 'san' 'ni' 'ichi' 'san' 'san' 'san' 'ichi'
 'ni' 'san' 'ichi' 'ichi' 'san' 'ni' 'ichi' 'san' 'ni' 'ni' 'san' 'ni'
 'ichi' 'ni' 'ichi' 'ichi' 'ni' 'ichi' 'ichi' 'ichi' 'san' 'san' 'ichi'
 'ni' 'ni' 'ichi' 'ni' 'ni' 'ichi' 'ichi' 'san' 'san' 'ni' 'ichi' 'ni'
 'ichi' 'ichi' 'san' 'ichi' 'ni' 'san' 'san' 'ni' 'ni' 'san' 'san' 'san'
 'ichi' 'san' 'ni' 'san' 'ichi' 'ichi' 'ichi' 'ni' 'san' 'ni' 'ni' 'ni'
 'ichi' 'ni' 'ichi' '

  overall_scores = overall_scores.append(


Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', SelectKBest()), ('estimator', Ridge())])
{'feature_selection__score_func': [<function f_regression at 0x17e5b7eb0>], 'feature_selection__k': [10, 25, 50], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 24 candidates, totalling 96 fits
Pipeline(steps=[('scaler', StandardScaler()),
                ('feature_selection', RFE(estimator=LinearRegression())),
                ('estimator', Ridge())])
{'feature_selection__n_features_to_select': [10, 25], 'feature_selection__step': [5], 'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 16 candidates, totalling 64 fits
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', Lasso())])
{'estimator__alpha': array([0.1, 1. , 0.2, 0.3, 0.4, 0.6, 0.8, 1. ])}
Fitting 4 folds for each of 8 candidates, totalling 32 fits
Pipeline(steps=[('scaler', StandardScaler()),
       

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_test_score,mean_test_score,std_test_score,std_test_score
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
model_description,params_str,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.1, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.459017,0.0702,0.289509,0.103003
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 1.0, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.476296,0.102076,0.282846,0.130985
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.8, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.488054,0.111889,0.286784,0.144838
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.6, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.503647,0.116068,0.293361,0.15126
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.4, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.526594,0.121226,0.301758,0.159521
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.1},-3.526663,0.052526,0.321074,0.089009
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Lasso()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.541873,0.086286,0.333931,0.103197
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.3, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.542045,0.12407,0.307359,0.165589
"dict_values([StandardScaler(), RFE(estimator=LinearRegression()), Ridge()])","{'estimator__alpha': 0.2, 'feature_selection__n_features_to_select': 25, 'feature_selection__step': 5}",-3.560783,0.127718,0.313939,0.172162
"dict_values([StandardScaler(), Lasso()])",{'estimator__alpha': 0.2},-3.582406,0.094581,0.34917,0.071168


doing permutation test on importance; this may take time.
Number of selected features: 12


Unnamed: 0,predictor,coef,feature_importance,fa_abs
34,BIS_11*ni,-3.184171,0.989051,0.989051
32,BSCS*ni,2.963403,0.895696,0.895696
33,EDM*ni,1.739765,0.32179,0.32179
66,RS*san,1.311721,0.172764,0.172764
68,ACES_neglectful_parenting*san,-0.95712,0.095619,0.095619
46,BFI_neuroticism*ni,-0.44609,0.02162,0.02162
71,ACES_divorced_separated*san,-0.340147,0.017789,0.017789
73,BFI_agreeableness*san,-0.265576,0.010182,0.010182
75,BFI_extraversion*san,0.298508,0.008965,0.008965
42,ACES_household_dysfunction*ni,-0.220918,0.007568,0.007568


  results_vs_cors = final_results_wide.merge(group_correlations, left_index=True, right_index=True, how='outer')


Unnamed: 0,"(coef, base)","(coef, ni)","(coef, san)","(feature_importance, base)","(feature_importance, ni)","(feature_importance, san)",ichi_cor,ni_cor,san_cor,abs_effect_sum
BIS_11,,-3.184,-0.0,,0.989,0.0,0.047,-0.567,-0.004,0.989
BSCS,,2.963,-0.099,,0.896,0.002,-0.137,0.564,-0.058,0.898
EDM,,1.74,,,0.322,,0.053,0.412,-0.087,0.322
RS,,,1.312,,,0.173,0.039,-0.23,0.412,0.173
ACES_neglectful_parenting,,0.0,-0.957,,0.0,0.096,-0.046,-0.005,-0.37,0.096
BFI_neuroticism,,-0.446,,,0.022,,,,,0.022
ACES_divorced_separated,,-0.0,-0.34,,0.0,0.018,,,,0.018
BFI_agreeableness,,,-0.266,,,0.01,,,,0.01
BFI_extraversion,,,0.299,,,0.009,,,,0.009
ACES_household_dysfunction,,-0.221,-0.0,,0.008,0.0,,,,0.008


  overall_scores = overall_scores.append(


In [9]:
overall_scores

Unnamed: 0,n_features,effect_size,overall_score
0,15.0,0.08,-0.078301
1,15.0,0.1,-0.121002
2,15.0,0.12,0.004276
3,15.0,0.14,0.054448
4,15.0,0.16,0.101453
5,20.0,0.08,-0.090946
6,20.0,0.1,-0.045115
7,20.0,0.12,-0.033768
8,20.0,0.14,0.035614
9,20.0,0.16,0.119037
