In [1]:
import os
import json
import pickle
import pandas as pd
import numpy as np
from sklearn.linear_model import RidgeCV, LogisticRegressionCV
from sklearn.model_selection import StratifiedKFold, cross_val_score
from tqdm.notebook import tqdm
from rca import make_binary_scoring, make_multiclass_scoring, process_categorical, best_logistic_solver, checker

## Loading Data

We drop feature_overlap because it contains many NaNs and compo_attribs because it doesn't have a large enough vocabulary and is also a identical to a 65 of the norms in the psychNorms dataset.

In [2]:
with open('../../data/brain_behav_union.pkl', 'rb') as f:
    brain_behav_union = pickle.load(f)

# Loading dictionary of dtype to embed
with open('../../data/dtype_to_embed.json', 'r') as f:
    dtype_to_embed = json.load(f)
    
brain_behav_names = dtype_to_embed['brain'] + dtype_to_embed['behavior']

# Pulling and standardising embeddings
embeds = {}
embeds_path = '../../data/embeds/'
for f_name in tqdm(os.listdir(embeds_path)):
    if f_name not in ['feature_overlap.csv', 'compo_attribs.csv']:  # dropping since contains many NaNs
        
        embed = pd.read_csv(embeds_path + f_name, index_col=0)
        embed_name = f_name.split('.')[0]
        
        # Subsetting to brain and behavior vocab
        embed = embed.loc[embed.index.intersection(brain_behav_union)]
        
        # Standardising
        embeds[embed_name] = (embed - embed.mean()) / embed.std()

{name: embed.shape for name, embed in embeds.items()}

  0%|          | 0/26 [00:00<?, ?it/s]

{'CBOW_GoogleNews': (42830, 300),
 'PPMI_SVD_SouthFlorida': (4959, 300),
 'SVD_sim_rel': (6002, 300),
 'spherical_text_Wikipedia': (35533, 300),
 'norms_sensorimotor': (36854, 11),
 'fastText_Wiki_News': (43143, 300),
 'PPMI_SVD_EAT': (7775, 300),
 'GloVe_Twitter': (32947, 200),
 'LexVec_CommonCrawl': (44082, 300),
 'fastTextSub_OpenSub': (40607, 300),
 'eye_tracking': (7486, 6),
 'SGSoftMaxOutput_SWOW': (25442, 300),
 'morphoNLM': (32769, 50),
 'SGSoftMaxInput_SWOW': (11783, 300),
 'fMRI_text_hyper_align': (1205, 1000),
 'GloVe_Wikipedia': (39421, 300),
 'EEG_text': (3355, 104),
 'fastText_CommonCrawl': (44443, 300),
 'fMRI_speech_hyper_align': (579, 6),
 'PPMI_SVD_SWOW': (11783, 300),
 'microarray': (626, 15),
 'EEG_speech': (1591, 130),
 'GloVe_CommonCrawl': (44278, 300),
 'THINGS': (1562, 49)}

In [3]:
with open('../../data/embed_to_dtype.json', 'r') as f:
    embed_to_type = json.load(f)
embed_to_type

{'CBOW_GoogleNews': 'text',
 'fastText_CommonCrawl': 'text',
 'fastText_Wiki_News': 'text',
 'fastTextSub_OpenSub': 'text',
 'GloVe_CommonCrawl': 'text',
 'GloVe_Twitter': 'text',
 'GloVe_Wikipedia': 'text',
 'LexVec_CommonCrawl': 'text',
 'morphoNLM': 'text',
 'spherical_text_Wikipedia': 'text',
 'eye_tracking': 'brain',
 'EEG_speech': 'brain',
 'EEG_text': 'brain',
 'fMRI_speech_hyper_align': 'brain',
 'fMRI_text_hyper_align': 'brain',
 'microarray': 'brain',
 'PPMI_SVD_SWOW': 'behavior',
 'SGSoftMaxInput_SWOW': 'behavior',
 'SGSoftMaxOutput_SWOW': 'behavior',
 'PPMI_SVD_SouthFlorida': 'behavior',
 'PPMI_SVD_EAT': 'behavior',
 'THINGS': 'behavior',
 'feature_overlap': 'behavior',
 'norms_sensorimotor': 'behavior',
 'compo_attribs': 'behavior',
 'SVD_sim_rel': 'behavior'}

In [4]:
# Loading norms
norms = pd.read_csv('../../data/psychNorms/psychNorms_processed.zip', index_col=0, compression='zip', low_memory=False)
norm_meta = pd.read_csv('../../data/psychNorms/psychNorms_metadata_processed.csv', index_col='norm')
norms

Unnamed: 0_level_0,frequency_lund,frequency_kucera,frequency_subtlexus,frequency_subtlexuk,frequency_blog_gimenes,frequency_twitter_gimenes,frequency_news_gimenes,frequency_written_cobuild,frequency_spoken_cobuild,context_diversity_subtlexus,...,person_vanarsdall,goals_vanarsdall,movement_vanarsdall,concreteness_vanarsdall,familiarity_vanarsdall,imageability_vanarsdall,familiarity_fear,aoa_fear,imageability_fear,sensory_experience_juhasz2013
word,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'em,0.0,,,,,,,1.3617,1.9138,,...,,,,,,,,,,
'neath,0.0,,,,,,,0.0000,0.0000,,...,,,,,,,,,,
're,0.0,,,,,,,0.9031,1.6335,,...,,,,,,,,,,
'shun,0.0,,,,,,,0.0000,0.0000,,...,,,,,,,,,,
'tis,0.0,,,,,,,0.4771,0.6021,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
shrick,,,,,,,,,,,...,,,,,,,2.62,4.38,2.93,
post office,,,,,,,,,,,...,,,,,,,3.79,3.07,5.29,
fishing rod,,,,,,,,,,,...,,,,,,,2.29,3.38,5.64,
March,,,,,,,,,,,...,,,,,,,3.43,2.76,3.50,


## Cross Validation

In [5]:
# Ridge
min_ord, max_ord = -5, 5
alphas = np.logspace(
    min_ord, max_ord, max_ord - min_ord + 1
)
ridge = RidgeCV(alphas=alphas)

# Logistic hyperparameters
Cs = 1 / alphas
inner_cv = 5
penalty = 'l2'

# Scorers
binary_scoring = make_binary_scoring()
multiclass_scoring = make_multiclass_scoring()
continuous_scoring = 'r2'

# outer_cv setting 
outer_cv, n_jobs = 5, 10

# RCA
rca = []
for embed_name in tqdm(embeds.keys()):
    embed = embeds[embed_name]
    
    to_print = []
    for norm_name in tqdm(norms.columns, desc=embed_name):
        
        # Aligning data
        y = norms[norm_name].dropna()
        X, y = embed.align(y, axis=0, join='inner', copy=True) 
        
        # Checking norm dtype 
        norm_dtype = norm_meta.loc[norm_name, 'type']
        
        # Solvers, scoring, estimators
        if norm_dtype in ['binary', 'multiclass']:
            X, y = process_categorical(outer_cv, inner_cv, X, y)
            
            # may have switched form multi to bin after processing
            norm_dtype = 'binary' if len(y.unique()) == 2 else 'multiclass'
            
            # Cross validation settings for logistic regression
            solver = best_logistic_solver(X, norm_dtype)
            
            # Defining logistic regression 
            estimator = LogisticRegressionCV(
                Cs=Cs, penalty=penalty, cv=StratifiedKFold(inner_cv),
                solver=solver, n_jobs=8
            )
            scoring = binary_scoring if norm_dtype == 'binary' else multiclass_scoring
        else: # continuous
            estimator, scoring = ridge, continuous_scoring
  
        # Cross validation
        associated_embed = norm_meta.loc[norm_name, 'associated_embed']
        check = checker(embed_name, y, norm_dtype, associated_embed, outer_cv)
        if check == 'pass':
            r2s = cross_val_score(estimator, X, y, cv=outer_cv, scoring=scoring, n_jobs=n_jobs) # stratification is automatically used for classification
            r2_mean, r2_sd = r2s.mean(), r2s.std()
        else:
            r2_mean, r2_sd = np.nan, np.nan
            
        # Saving
        train_n = int(((outer_cv - 1) / outer_cv) * len(X))
        test_n = len(X) - train_n
        p = X.shape[1]
        embed_type = embed_to_type[embed_name]
        rca.append([
            embed_name, embed_type, norm_name, train_n, test_n, p, 
            r2_mean, r2_sd, check
        ])
        
        to_print.append([norm_name, train_n, r2_mean, r2_sd, check])
            
            
    to_print = pd.DataFrame(to_print, columns=['norm' , 'train_n', 'r2_mean', 'r2_sd', 'check'])
    print(to_print.sort_values('r2_mean', ascending=False).head(10))

rca = pd.DataFrame(
    rca, columns=[
        'embed', 'embed_type', 'norm', 'train_n', 'test_n', 'p', 
        'r2_mean', 'r2_sd', 'check'
    ]
)

rca.to_csv('../../data/results/rca.csv', index=False)
rca

  0%|          | 0/24 [00:00<?, ?it/s]

CBOW_GoogleNews:   0%|          | 0/291 [00:00<?, ?it/s]

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                        norm  train_n   r2_mean     r2_sd check
281        person_vanarsdall      960  0.817393  0.019020  pass
279       thought_vanarsdall      960  0.815265  0.013993  pass
216            valence_britz      375  0.809409  0.024288  pass
217         social_des_britz      375  0.805828  0.022057  pass
282         goals_vanarsdall      960  0.796573  0.020816  pass
161          goals_wilkowski      837  0.785734  0.012998  pass
176      concreteness_hollis      828  0.784008  0.018515  pass
24      concreteness_glasgow     3672  0.781250  0.009689  pass
267     likableness_anderson      386  0.780283  0.036849  pass
280  reproduction_vanarsdall      960  0.776169  0.009854  pass


PPMI_SVD_SouthFlorida:   0%|          | 0/291 [00:00<?, ?it/s]



                       norm  train_n   r2_mean     r2_sd check
35      gustatory_lancaster     3814  0.672401  0.016191  pass
23   concreteness_brysbaert     3815  0.655553  0.016042  pass
77       cue_setsize_nelson     3963  0.639973  0.016224  pass
24     concreteness_glasgow     2205  0.613463  0.014951  pass
198          fear_stevenson      595  0.601305  0.047646  pass
174          valence_hollis      596  0.601222  0.030612  pass
252      imagery_vanderveur      577  0.592696  0.032846  pass
52          valence_glasgow     2205  0.590960  0.022241  pass
25     imageability_glasgow     2205  0.589379  0.022460  pass
282        goals_vanarsdall      960  0.588655  0.031687  pass


SVD_sim_rel:   0%|          | 0/291 [00:00<?, ?it/s]



                            norm  train_n   r2_mean     r2_sd check
121                 music_binder      283  0.489491  0.123858  pass
247     visual_complexity_marrow      294  0.298847  0.058843  pass
35           gustatory_lancaster     4113  0.295551  0.121860  pass
278            living_vanarsdall      697  0.292883  0.079814  pass
283          movement_vanarsdall      697  0.285231  0.077082  pass
280      reproduction_vanarsdall      697  0.271412  0.084066  pass
24          concreteness_glasgow     1834  0.264933  0.104800  pass
25          imageability_glasgow     1834  0.251730  0.105068  pass
42   body_object_interact_pexman     2320  0.250690  0.109046  pass
118                   low_binder      283  0.226604  0.095796  pass


spherical_text_Wikipedia:   0%|          | 0/291 [00:00<?, ?it/s]

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                        norm  train_n   r2_mean     r2_sd check
279       thought_vanarsdall      960  0.733163  0.036479  pass
281        person_vanarsdall      960  0.729775  0.024890  pass
216            valence_britz      380  0.722395  0.092661  pass
24      concreteness_glasgow     3696  0.722372  0.087919  pass
217         social_des_britz      380  0.720185  0.089407  pass
236           thought_troche      600  0.717827  0.072344  pass
234            visual_troche      600  0.706366  0.110036  pass
282         goals_vanarsdall      960  0.696704  0.052504  pass
280  reproduction_vanarsdall      960  0.693522  0.039290  pass
176      concreteness_hollis      828  0.688856  0.090635  pass


norms_sensorimotor:   0%|          | 0/291 [00:00<?, ?it/s]

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                         norm  train_n   r2_mean     r2_sd check
123              taste_binder      404  0.914986  0.025669  pass
124              smell_binder      404  0.854866  0.024746  pass
165           auditory_lynott      292  0.850399  0.031244  pass
166          olfactory_lynott      292  0.829914  0.030290  pass
167          gustatory_lynott      292  0.822489  0.049956  pass
164             haptic_lynott      292  0.773726  0.053972  pass
163             visual_lynott      292  0.756229  0.056084  pass
168  dominant_modality_lynott      275  0.753450  0.072064  pass
111              touch_binder      404  0.749546  0.022353  pass
125               head_binder      404  0.711896  0.067124  pass


fastText_Wiki_News:   0%|          | 0/291 [00:00<?, ?it/s]

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                     norm  train_n   r2_mean     r2_sd check
279    thought_vanarsdall      960  0.834966  0.026638  pass
216         valence_britz      384  0.834904  0.066899  pass
217      social_des_britz      384  0.834164  0.066075  pass
281     person_vanarsdall      960  0.809627  0.045865  pass
282      goals_vanarsdall      960  0.808161  0.028791  pass
267  likableness_anderson      404  0.787729  0.065806  pass
24   concreteness_glasgow     3705  0.774988  0.061083  pass
278     living_vanarsdall      960  0.765979  0.010340  pass
176   concreteness_hollis      829  0.764481  0.074537  pass
161       goals_wilkowski      842  0.759578  0.075304  pass


PPMI_SVD_EAT:   0%|          | 0/291 [00:00<?, ?it/s]



                           norm  train_n   r2_mean     r2_sd check
252          imagery_vanderveur      669  0.746894  0.033563  pass
23       concreteness_brysbaert     5052  0.662313  0.012862  pass
121                music_binder      322  0.649720  0.070117  pass
24         concreteness_glasgow     2346  0.642724  0.016109  pass
52              valence_glasgow     2346  0.624767  0.009774  pass
164               haptic_lynott      147  0.622733  0.066726  pass
55   valence_younger_kyrolainen     1580  0.613297  0.023370  pass
25         imageability_glasgow     2346  0.607133  0.012411  pass
185               fearful_zupan      143  0.605482  0.056784  pass
161             goals_wilkowski      296  0.600333  0.022023  pass


GloVe_Twitter:   0%|          | 0/291 [00:00<?, ?it/s]



                     norm  train_n   r2_mean     r2_sd check
217      social_des_britz      373  0.758568  0.054093  pass
216         valence_britz      373  0.751430  0.064310  pass
279    thought_vanarsdall      960  0.719691  0.028873  pass
282      goals_vanarsdall      960  0.713800  0.020285  pass
281     person_vanarsdall      960  0.711309  0.033332  pass
176   concreteness_hollis      828  0.700749  0.061538  pass
267  likableness_anderson      385  0.685802  0.115346  pass
186           happy_zupan      364  0.675214  0.143183  pass
190         valence_zupan      364  0.673466  0.150396  pass
24   concreteness_glasgow     3699  0.669733  0.056159  pass


LexVec_CommonCrawl:   0%|          | 0/291 [00:00<?, ?it/s]

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                      norm  train_n   r2_mean     r2_sd check
281      person_vanarsdall      960  0.792867  0.022710  pass
279     thought_vanarsdall      960  0.792429  0.026844  pass
282       goals_vanarsdall      960  0.783946  0.036015  pass
24    concreteness_glasgow     3702  0.772512  0.040854  pass
234          visual_troche      600  0.760880  0.062668  pass
211  tabooness_janschewitz      359  0.757716  0.057394  pass
216          valence_britz      375  0.747692  0.120313  pass
123           taste_binder      425  0.745493  0.124052  pass
217       social_des_britz      375  0.742210  0.113742  pass
161        goals_wilkowski      842  0.741943  0.090778  pass


fastTextSub_OpenSub:   0%|          | 0/291 [00:00<?, ?it/s]

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


                      norm  train_n   r2_mean     r2_sd check
279     thought_vanarsdall      959  0.828842  0.034322  pass
281      person_vanarsdall      959  0.809045  0.042116  pass
176    concreteness_hollis      828  0.807378  0.049770  pass
24    concreteness_glasgow     3701  0.803673  0.049403  pass
216          valence_britz      375  0.798553  0.099952  pass
217       social_des_britz      375  0.793407  0.105341  pass
211  tabooness_janschewitz      358  0.790804  0.029085  pass
282       goals_vanarsdall      959  0.787861  0.064859  pass
234          visual_troche      600  0.771801  0.060146  pass
278      living_vanarsdall      959  0.770237  0.042745  pass


eye_tracking:   0%|          | 0/291 [00:00<?, ?it/s]



                         norm  train_n   r2_mean     r2_sd check
173                aoa_hollis      428  0.132150  0.116342  pass
263                aoa_citron      164  0.130884  0.099540  pass
264       imageability_citron      164  0.125862  0.077706  pass
128           practice_binder      210  0.068539  0.043532  pass
19                aoa_glasgow     1603  0.068052  0.079722  pass
274                aoa_davies      405  0.045808  0.285739  pass
93     concreteness_rt_pexman     1461  0.045594  0.022906  pass
132               near_binder      239  0.037747  0.051240  pass
214  imageability_janschewitz      146  0.032423  0.018922  pass
218       observability_britz      167  0.029894  0.061155  pass


SGSoftMaxOutput_SWOW:   0%|          | 0/291 [00:00<?, ?it/s]

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                     norm  train_n   r2_mean     r2_sd check
216         valence_britz      376  0.562824  0.092797  pass
217      social_des_britz      376  0.560636  0.092795  pass
186           happy_zupan      356  0.546821  0.122537  pass
253  likableness_chandler      576  0.533993  0.123907  pass
161       goals_wilkowski      661  0.530117  0.135432  pass
190         valence_zupan      356  0.527797  0.129881  pass
252    imagery_vanderveur      722  0.518544  0.116985  pass
189             sad_zupan      356  0.512055  0.126016  pass
195   happiness_stevenson      815  0.499735  0.110030  pass
174        valence_hollis      820  0.491126  0.114253  pass


morphoNLM:   0%|          | 0/291 [00:00<?, ?it/s]

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                    norm  train_n   r2_mean     r2_sd check
252   imagery_vanderveur      747  0.699249  0.035336  pass
236       thought_troche      598  0.681348  0.027528  pass
234        visual_troche      598  0.637440  0.037701  pass
282     goals_vanarsdall      959  0.627133  0.024324  pass
176  concreteness_hollis      821  0.622849  0.031675  pass
233      morality_troche      598  0.614049  0.064030  pass
281    person_vanarsdall      959  0.587448  0.026257  pass
146     cognition_binder      421  0.584611  0.049284  pass
279   thought_vanarsdall      959  0.583179  0.024608  pass
203   concreteness_brown      498  0.575987  0.035185  pass


SGSoftMaxInput_SWOW:   0%|          | 0/291 [00:00<?, ?it/s]



                     norm  train_n   r2_mean     r2_sd check
216         valence_britz      296  0.849327  0.034629  pass
217      social_des_britz      296  0.842480  0.040147  pass
253  likableness_chandler      408  0.839290  0.021115  pass
267  likableness_anderson      290  0.819831  0.015151  pass
190         valence_zupan      254  0.799136  0.020895  pass
161       goals_wilkowski      474  0.790575  0.027064  pass
186           happy_zupan      254  0.787081  0.035349  pass
174        valence_hollis      762  0.780688  0.022227  pass
52        valence_glasgow     3252  0.770929  0.011766  pass
252    imagery_vanderveur      684  0.769867  0.016589  pass


fMRI_text_hyper_align:   0%|          | 0/291 [00:00<?, ?it/s]



                                   norm  train_n   r2_mean     r2_sd check
74          n_semantic_neighbors_shaoul      926  0.040362  0.029607  pass
290       sensory_experience_juhasz2013      461  0.030186  0.060513  pass
75   distance_semantic_neighbors_shaoul      926  0.029668  0.032998  pass
53                     valence_mohammad      526  0.023204  0.017587  pass
1                      frequency_kucera      912  0.022745  0.124903  pass
13       context_diversity_news_gimenes      930  0.021558  0.084334  pass
59                   dominance_warriner      399  0.016709  0.028182  pass
2                   frequency_subtlexus      921  0.015870  0.097284  pass
3                   frequency_subtlexuk      926  0.013814  0.082193  pass
73           semantic_diversity_hoffman      900  0.012783  0.037428  pass


GloVe_Wikipedia:   0%|          | 0/291 [00:00<?, ?it/s]

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                     norm  train_n   r2_mean     r2_sd check
216         valence_britz      382  0.800102  0.064758  pass
217      social_des_britz      382  0.791933  0.068751  pass
236        thought_troche      600  0.709518  0.092018  pass
281     person_vanarsdall      960  0.707569  0.047245  pass
282      goals_vanarsdall      960  0.705896  0.062885  pass
279    thought_vanarsdall      960  0.705014  0.054011  pass
234         visual_troche      600  0.692137  0.121478  pass
161       goals_wilkowski      830  0.689635  0.131215  pass
176   concreteness_hollis      829  0.686559  0.107578  pass
267  likableness_anderson      400  0.685752  0.089461  pass


EEG_text:   0%|          | 0/291 [00:00<?, ?it/s]



                                 norm  train_n   r2_mean     r2_sd check
116                   audition_binder      144  0.019747  0.035246  pass
21                      pos_brysbaert     2538  0.002125  0.001896  pass
22                      pos_vanheuven     2565  0.001521  0.001186  pass
72                     trust_mohammad     1359 -0.000671  0.005806  pass
78           cue_connectedness_nelson     1025 -0.001550  0.001589  pass
29            n_meanings_websters_gao     1882 -0.001707  0.008256  pass
16               prevalence_brysbaert     2108 -0.002220  0.002169  pass
88            auditory_lexical_rt_goh     1232 -0.002567  0.001345  pass
89   auditory_lexical_accuracy_tucker     2224 -0.002935  0.002028  pass
85    visual_lexical_accuracy_mandera     2108 -0.003073  0.004619  pass


fastText_CommonCrawl:   0%|          | 0/291 [00:00<?, ?it/s]

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                        norm  train_n   r2_mean     r2_sd check
279       thought_vanarsdall      960  0.863282  0.025518  pass
217         social_des_britz      384  0.844004  0.061821  pass
281        person_vanarsdall      960  0.842943  0.033633  pass
216            valence_britz      384  0.837175  0.074419  pass
282         goals_vanarsdall      960  0.831619  0.030831  pass
280  reproduction_vanarsdall      960  0.807340  0.016589  pass
211    tabooness_janschewitz      359  0.806742  0.036261  pass
278        living_vanarsdall      960  0.804306  0.016384  pass
24      concreteness_glasgow     3705  0.800845  0.042360  pass
176      concreteness_hollis      829  0.796213  0.061914  pass


fMRI_speech_hyper_align:   0%|          | 0/291 [00:00<?, ?it/s]

                           norm  train_n   r2_mean     r2_sd check
44        semantic_size_glasgow      162  0.032631  0.046006  pass
39       mouth_throat_lancaster      401  0.001253  0.015996  pass
21                pos_brysbaert      380 -0.000545  0.001205  pass
83   visual_lexical_rt_keuleers      414 -0.001398  0.001967  pass
22                pos_vanheuven      419 -0.003614  0.008174  pass
46        feature_visual_vinson      108 -0.003762  0.053454  pass
32           auditory_lancaster      401 -0.003864  0.049854  pass
18                aoa_brysbaert      313 -0.006742  0.004225  pass
73   semantic_diversity_hoffman      452 -0.007754  0.007916  pass
269           perc_known_winter      369 -0.008290  0.005617  pass


PPMI_SVD_SWOW:   0%|          | 0/291 [00:00<?, ?it/s]

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


                           norm  train_n   r2_mean     r2_sd check
253        likableness_chandler      408  0.854160  0.032139  pass
174              valence_hollis      762  0.853321  0.015269  pass
216               valence_britz      296  0.848744  0.037245  pass
52              valence_glasgow     3252  0.846008  0.010713  pass
217            social_des_britz      296  0.834233  0.041997  pass
161             goals_wilkowski      474  0.823846  0.029958  pass
267        likableness_anderson      290  0.822380  0.036759  pass
55   valence_younger_kyrolainen     2315  0.820744  0.014745  pass
195         happiness_stevenson      758  0.815586  0.015399  pass
186                 happy_zupan      254  0.809458  0.048544  pass


microarray:   0%|          | 0/291 [00:00<?, ?it/s]

                                norm  train_n   r2_mean     r2_sd check
39            mouth_throat_lancaster      392  0.035050  0.044618  pass
62                 humor_engelthaler      178  0.011184  0.046550  pass
36           interoceptive_lancaster      392  0.006731  0.065213  pass
78          cue_connectedness_nelson      304  0.003092  0.026233  pass
21                     pos_brysbaert      458  0.002348  0.000920  pass
22                     pos_vanheuven      479  0.001981  0.001501  pass
32                auditory_lancaster      392  0.000049  0.014583  pass
85   visual_lexical_accuracy_mandera      396 -0.001880  0.004929  pass
92            naming_accuracy_balota      471 -0.002179  0.014764  pass
273                    aoa_rt_schock       96 -0.002804  0.050287  pass


EEG_speech:   0%|          | 0/291 [00:00<?, ?it/s]



                                 norm  train_n   r2_mean     r2_sd check
266                   arousal_imbault      195  0.033178  0.036310  pass
287                  familiarity_fear      103  0.033056  0.064010  pass
22                      pos_vanheuven     1215  0.005088  0.008379  pass
21                      pos_brysbaert     1184  0.005027  0.009399  pass
35                gustatory_lancaster     1006  0.000929  0.003119  pass
89   auditory_lexical_accuracy_tucker     1126 -0.001811  0.004149  pass
45         gender_association_glasgow      447 -0.002275  0.006421  pass
16               prevalence_brysbaert     1033 -0.003254  0.003390  pass
61                 dominance_mohammad      744 -0.003332  0.002841  pass
269                 perc_known_winter      857 -0.003680  0.006996  pass


GloVe_CommonCrawl:   0%|          | 0/291 [00:00<?, ?it/s]

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                      norm  train_n   r2_mean     r2_sd check
216          valence_britz      384  0.832397  0.096434  pass
217       social_des_britz      384  0.827537  0.084828  pass
281      person_vanarsdall      960  0.818899  0.027482  pass
279     thought_vanarsdall      960  0.813102  0.019946  pass
282       goals_vanarsdall      960  0.806998  0.021245  pass
211  tabooness_janschewitz      359  0.793416  0.050916  pass
267   likableness_anderson      405  0.773479  0.090871  pass
161        goals_wilkowski      842  0.762251  0.087604  pass
253   likableness_chandler      644  0.755676  0.083720  pass
174         valence_hollis      829  0.755092  0.072602  pass


THINGS:   0%|          | 0/291 [00:00<?, ?it/s]



                        norm  train_n   r2_mean     r2_sd check
104         biomotion_binder      128  0.872332  0.042505  pass
278        living_vanarsdall      376  0.841733  0.019199  pass
110              body_binder      128  0.834058  0.052231  pass
280  reproduction_vanarsdall      376  0.827012  0.027135  pass
35       gustatory_lancaster     1235  0.813969  0.040591  pass
123             taste_binder      128  0.809113  0.100118  pass
121             music_binder      128  0.760995  0.067979  pass
283      movement_vanarsdall      376  0.723601  0.033689  pass
279       thought_vanarsdall      376  0.721569  0.080251  pass
130              path_binder      128  0.715336  0.068376  pass


ValueError: 8 columns passed, passed data had 9 columns