In [13]:
import os
import json
import pickle
import pandas as pd
import numpy as np
from sklearn.linear_model import RidgeCV, LogisticRegressionCV
from sklearn.model_selection import StratifiedKFold
from tqdm.notebook import tqdm
from rca import make_binary_scoring, make_multiclass_scoring, process_categorical, best_logistic_solver, checker, k_fold_cross_val

## Loading Data

In [14]:
# Loading dictionary of dtype to embed
with open('../../data/raw/dtype_to_embed.json', 'r') as f:
    dtype_to_embed = json.load(f)
    
brain_behav_names = dtype_to_embed['brain'] + dtype_to_embed['behavior']

# Iterating through pulled_embeds and finding union of all brain and behavior vocabs
embeds_path = '../../data/raw/embeds/'
brain_behav_union = set()
for name in tqdm(brain_behav_names):
    vocab = set(pd.read_csv(embeds_path + name + '.csv', index_col=0).index)
    brain_behav_union = brain_behav_union.union(vocab)

len(brain_behav_union)  

  0%|          | 0/16 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
# Pulling and standardising embeddings
embeds = {}
embeds_path = '../../data/raw/embeds/'
for f_name in tqdm(os.listdir(embeds_path)):
    if f_name != 'feature_overlap.csv':  # dropping since contains many NaNs
        
        embed = pd.read_csv(embeds_path + f_name, index_col=0)
        embed_name = f_name.split('.')[0]
        
        # Subsetting to brain and behavior vocab
        embed = embed.loc[embed.index.intersection(brain_behav_union)]
        
        # Standardising
        embeds[embed_name] = (embed - embed.mean()) / embed.std()

{name: embed.shape for name, embed in embeds.items()}

In [15]:
norms = pd.read_csv('../../data/raw/psychNorms.zip', index_col=0, compression='zip', low_memory=False)
norm_metadata = pd.read_csv('../../data/raw/psychNorms_metadata.csv', index_col='norm')
norm_metadata['associated_embed'] = norm_metadata['associated_embed'].astype(str)
norms

Unnamed: 0,Freq_HAL,Freq_KF,Freq_SUBTLEXUS,Freq_SUBTLEXUK,Freq_Blog,Freq_Twitter,Freq_News,Freq_CobW,Freq_CobS,CD_SUBTLEXUS,...,reproduction_vanarsdall,person_vanarsdall,goals_vanarsdall,movement_vanarsdall,concreteness_vanarsdall,familiarity_vanarsdall,imageability_vanarsdall,familiarity_fear,aoa_fear,imageability_fear
'em,0.0,,,,,,,1.3617,1.9138,,...,,,,,,,,,,
'neath,0.0,,,,,,,0.0000,0.0000,,...,,,,,,,,,,
're,0.0,,,,,,,0.9031,1.6335,,...,,,,,,,,,,
'shun,0.0,,,,,,,0.0000,0.0000,,...,,,,,,,,,,
'tis,0.0,,,,,,,0.4771,0.6021,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
shrick,,,,,,,,,,,...,,,,,,,,2.62,4.38,2.93
post office,,,,,,,,,,,...,,,,,,,,3.79,3.07,5.29
fishing rod,,,,,,,,,,,...,,,,,,,,2.29,3.38,5.64
March,,,,,,,,,,,...,,,,,,,,3.43,2.76,3.50


In [5]:
# Log transforming selected norms
with open('../../data/processed/norms_to_log.pkl', 'rb') as f:
    norms_to_log = pickle.load(f)
    norms[norms_to_log] = norms[norms_to_log].apply(np.log1p)

with open('../../data/raw/embed_to_dtype.json', 'r') as f:
    embed_to_type = json.load(f)
embed_to_type

{'CBOW_GoogleNews': 'text',
 'fastText_CommonCrawl': 'text',
 'fastText_Wiki_News': 'text',
 'fastTextSub_OpenSub': 'text',
 'GloVe_CommonCrawl': 'text',
 'GloVe_Twitter': 'text',
 'GloVe_Wikipedia': 'text',
 'LexVec_CommonCrawl': 'text',
 'morphoNLM': 'text',
 'spherical_text_Wikipedia': 'text',
 'eye_tracking': 'brain',
 'EEG_speech': 'brain',
 'EEG_text': 'brain',
 'fMRI_speech_hyper_align': 'brain',
 'fMRI_text_hyper_align': 'brain',
 'microarray': 'brain',
 'PPMI_SVD_SWOW': 'behavior',
 'SGSoftMaxInput_SWOW': 'behavior',
 'SGSoftMaxOutput_SWOW': 'behavior',
 'PPMI_SVD_SouthFlorida': 'behavior',
 'PPMI_SVD_EAT': 'behavior',
 'THINGS': 'behavior',
 'feature_overlap': 'behavior',
 'norms_sensorimotor': 'behavior',
 'compo_attribs': 'behavior',
 'SVD_sim_rel': 'behavior'}

## Cross Validation

In [6]:
# Ridge
alphas = np.logspace(-3, 3, 6)
ridge = RidgeCV(alphas=alphas)

# Logistic hyperparameters
Cs = 1 / alphas
inner_cv = 5
penalty = 'l2'

# Scorers
binary_scoring = make_binary_scoring()
multiclass_scoring = make_multiclass_scoring()
continuous_scoring = {'r2': 'r2', 'neg_mse': 'neg_mean_squared_error'}

# outer_cv setting 
outer_cv, n_jobs = 5, 10

In [7]:
# RCA
rca = []
for embed_name in tqdm(embeds.keys()):
    embed = embeds[embed_name]
    
    to_print = []
    for norm_name in tqdm(norms.columns, desc=embed_name):
        
        # Aligning data
        y = norms[norm_name].dropna()
        X, y = embed.align(y, axis=0, join='inner', copy=True)
        
        # Checking norm dtype 
        norm_dtype = norm_metadata.loc[norm_name, 'type']
        
        # Solvers, scoring, estimators
        if norm_dtype in ['binary', 'multiclass']:
            X, y = process_categorical(outer_cv, inner_cv, X, y)
            
            # may have switched form multi to bin after processing
            norm_dtype = 'binary' if len(y.unique()) == 2 else 'multiclass'
            
            # Cross validation settings for logistic regression
            solver = best_logistic_solver(X, norm_dtype)
            
            # Defining logistic regression 
            estimator = LogisticRegressionCV(
                Cs=Cs, penalty=penalty, cv=StratifiedKFold(inner_cv),
                solver=solver, n_jobs=8
            )
            scoring = binary_scoring if norm_dtype == 'binary' else multiclass_scoring
        else: # continuous
            estimator, scoring = ridge, continuous_scoring
  
        # Cross validation
        associated_embed = norm_metadata.loc[norm_name, 'associated_embed']
        check = checker(embed_name, y, norm_dtype, associated_embed, outer_cv)
        if check == 'pass':
             scores = k_fold_cross_val(estimator, X, y, outer_cv, scoring, n_jobs)
             r2s, mses = scores['test_r2'], - scores['test_neg_mse']
             r2_mean, r2_sd = r2s.mean(), r2s.std()
             mse_mean, mse_sd = mses.mean(), mses.std()
        else:
            r2_mean, r2_sd = np.nan, np.nan
            mse_mean, mse_sd = np.nan, np.nan
            
        # Saving
        train_n = int(((outer_cv - 1) / outer_cv) * len(X))
        test_n = len(X) - train_n
        p = X.shape[1]
        embed_type = embed_to_type[embed_name]
        rca.append([
            embed_name, embed_type, norm_name, train_n, test_n, p, 
            r2_mean, r2_sd, mse_mean, mse_sd, check
        ])
        
        to_print.append([norm_name, train_n, r2_mean, r2_sd, check])

    to_print = pd.DataFrame(to_print, columns=['norm' , 'train_n', 'r2_mean', 'r2_sd', 'check'])
    print(to_print.sort_values('r2_mean', ascending=False).head(10))

rca = pd.DataFrame(
    rca, columns=[
        'embed', 'embed_type', 'norm', 'train_n', 'test_n', 'p', 
        'r2_mean', 'r2_sd', 'mse_mean', 'mse_sd', 'check'
    ]
)

rca.to_csv('../../data/final/rca.csv', index=False)
rca

  0%|          | 0/25 [00:00<?, ?it/s]

CBOW_GoogleNews:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                        norm  train_n   r2_mean     r2_sd check
283        person_vanarsdall      960  0.808290  0.024645  pass
281       thought_vanarsdall      960  0.806842  0.020711  pass
284         goals_vanarsdall      960  0.796911  0.022164  pass
218            valence_britz      375  0.795997  0.024787  pass
219         social_des_britz      375  0.789863  0.024555  pass
162          goals_wilkowski      837  0.785734  0.012998  pass
24              Conc_Glasgow     3672  0.781250  0.009689  pass
177      concreteness_hollis      828  0.778491  0.019184  pass
282  reproduction_vanarsdall      960  0.776782  0.010919  pass
280        living_vanarsdall      960  0.775123  0.012784  pass


PPMI_SVD_SouthFlorida:   0%|          | 0/292 [00:00<?, ?it/s]

Traceback (most recent call last):
  File "/datapool-1/homepoint/zhussain/anaconda3/lib/python3.9/site-packages/pandas/core/nanops.py", line 1622, in _ensure_numeric
    x = float(x)
ValueError: could not convert string to float: 'VisualVisualVisualVisualVisualVisualVisualVisualVisualVisualVisualVisualVisualVisualVisualVisualHapticHapticVisualHapticHapticHapticHapticVisual'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/datapool-1/homepoint/zhussain/anaconda3/lib/python3.9/site-packages/pandas/core/nanops.py", line 1626, in _ensure_numeric
    x = complex(x)
ValueError: complex() arg is a malformed string

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/datapool-1/homepoint/zhussain/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/datapool-1/homepoint

                 norm  train_n   r2_mean     r2_sd check
78        Cue_SetSize     3963  0.680452  0.021580  pass
35     Gustatory_Lanc     3814  0.672041  0.016565  pass
23          Conc_Brys     3815  0.655279  0.016208  pass
24       Conc_Glasgow     2205  0.612242  0.015712  pass
53    Valence_Glasgow     2205  0.589302  0.023232  pass
25       Imag_Glasgow     2205  0.588114  0.023052  pass
284  goals_vanarsdall      960  0.578370  0.033527  pass
175    valence_hollis      596  0.577282  0.036165  pass
199    fear_stevenson      595  0.576698  0.048410  pass
51         Socialness     1946  0.575751  0.015984  pass


SVD_sim_rel:   0%|          | 0/292 [00:00<?, ?it/s]



                         norm  train_n   r2_mean     r2_sd check
122              Music_Binder      283  0.489491  0.123858  pass
249  visual_complexity_marrow      294  0.298847  0.058843  pass
35             Gustatory_Lanc     4113  0.295551  0.121860  pass
280         living_vanarsdall      697  0.292883  0.079814  pass
285       movement_vanarsdall      697  0.285231  0.077082  pass
165                haptic_lyn       68  0.278858  0.064090  pass
282   reproduction_vanarsdall      697  0.271412  0.084066  pass
24               Conc_Glasgow     1834  0.264933  0.104800  pass
25               Imag_Glasgow     1834  0.251730  0.105068  pass
42                        BOI     2320  0.250690  0.109046  pass


spherical_text_Wikipedia:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                    norm  train_n   r2_mean     r2_sd check
281   thought_vanarsdall      960  0.724437  0.031529  pass
24          Conc_Glasgow     3696  0.721401  0.087385  pass
283    person_vanarsdall      960  0.711201  0.031177  pass
238       thought_troche      600  0.710198  0.082919  pass
284     goals_vanarsdall      960  0.696985  0.058851  pass
218        valence_britz      380  0.691914  0.105787  pass
219     social_des_britz      380  0.685550  0.107345  pass
162      goals_wilkowski      789  0.685099  0.115948  pass
236        visual_troche      600  0.683930  0.120126  pass
177  concreteness_hollis      828  0.677174  0.095045  pass


norms_sensorimotor:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                      norm  train_n   r2_mean     r2_sd check
124           Taste_Binder      404  0.914900  0.025870  pass
125           Smell_Binder      404  0.855016  0.024182  pass
166           auditory_lyn      292  0.851176  0.031339  pass
167          olfactory_lyn      292  0.829461  0.031711  pass
168          gustatory_lyn      292  0.822265  0.049779  pass
169  dominant_modality_lyn      275  0.781174  0.058839  pass
165             haptic_lyn      292  0.773093  0.054113  pass
164             visual_lyn      292  0.755749  0.056419  pass
112           Touch_Binder      404  0.749467  0.023388  pass
126            Head_Binder      404  0.712681  0.066904  pass


fastText_Wiki_News:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                     norm  train_n   r2_mean     r2_sd check
281    thought_vanarsdall      960  0.830409  0.027144  pass
218         valence_britz      384  0.825426  0.067015  pass
219      social_des_britz      384  0.824494  0.067679  pass
283     person_vanarsdall      960  0.803788  0.049916  pass
284      goals_vanarsdall      960  0.803596  0.030151  pass
24           Conc_Glasgow     3705  0.774557  0.060356  pass
269  likableness_anderson      404  0.774098  0.067244  pass
162       goals_wilkowski      842  0.763554  0.078210  pass
255  likableness_chandler      642  0.751793  0.053688  pass
280     living_vanarsdall      960  0.745722  0.020758  pass


PPMI_SVD_EAT:   0%|          | 0/292 [00:00<?, ?it/s]

Traceback (most recent call last):
  File "/datapool-1/homepoint/zhussain/anaconda3/lib/python3.9/site-packages/pandas/core/nanops.py", line 1622, in _ensure_numeric
    x = float(x)
ValueError: could not convert string to float: 'HapticHapticHapticHapticHapticHapticVisualVisualVisualVisualVisualHapticVisualVisualVisualVisualVisualHapticVisualVisualVisualVisualVisualVisualVisualVisualVisualVisualVisualVisual'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/datapool-1/homepoint/zhussain/anaconda3/lib/python3.9/site-packages/pandas/core/nanops.py", line 1626, in _ensure_numeric
    x = complex(x)
ValueError: complex() arg is a malformed string

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/datapool-1/homepoint/zhussain/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y

                    norm  train_n   r2_mean     r2_sd check
254   imagery_vanderveur      669  0.736789  0.034730  pass
23             Conc_Brys     5052  0.662073  0.012838  pass
24          Conc_Glasgow     2346  0.641315  0.015923  pass
122         Music_Binder      322  0.625332  0.075830  pass
53       Valence_Glasgow     2346  0.623306  0.009311  pass
56   Valence_Covid_Older     1580  0.610636  0.023803  pass
165           haptic_lyn      147  0.610528  0.063918  pass
25          Imag_Glasgow     2346  0.605545  0.012308  pass
51            Socialness     2300  0.594224  0.018779  pass
218        valence_britz      141  0.593403  0.118236  pass


GloVe_Twitter:   0%|          | 0/292 [00:00<?, ?it/s]



                     norm  train_n   r2_mean     r2_sd check
219      social_des_britz      373  0.744196  0.053356  pass
218         valence_britz      373  0.734323  0.063612  pass
281    thought_vanarsdall      960  0.714135  0.029581  pass
283     person_vanarsdall      960  0.706224  0.034282  pass
284      goals_vanarsdall      960  0.701020  0.028907  pass
177   concreteness_hollis      828  0.678475  0.084815  pass
24           Conc_Glasgow     3699  0.668408  0.057491  pass
236         visual_troche      600  0.668343  0.070746  pass
255  likableness_chandler      606  0.662612  0.103158  pass
269  likableness_anderson      385  0.661039  0.120146  pass


LexVec_CommonCrawl:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                      norm  train_n   r2_mean     r2_sd check
283      person_vanarsdall      960  0.787200  0.023805  pass
281     thought_vanarsdall      960  0.786447  0.027429  pass
284       goals_vanarsdall      960  0.777526  0.039134  pass
24            Conc_Glasgow     3702  0.771092  0.039817  pass
213  tabooness_janschewitz      359  0.750851  0.063010  pass
236          visual_troche      600  0.749564  0.055939  pass
218          valence_britz      375  0.738092  0.116826  pass
124           Taste_Binder      425  0.735436  0.129698  pass
219       social_des_britz      375  0.734297  0.109182  pass
142   Consequential_Binder      425  0.731721  0.071047  pass


fastTextSub_OpenSub:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


                        norm  train_n   r2_mean     r2_sd check
281       thought_vanarsdall      959  0.823303  0.034426  pass
177      concreteness_hollis      828  0.802006  0.047937  pass
283        person_vanarsdall      959  0.801423  0.044976  pass
284         goals_vanarsdall      959  0.799058  0.055979  pass
24              Conc_Glasgow     3701  0.796716  0.060363  pass
218            valence_britz      375  0.784826  0.110251  pass
219         social_des_britz      375  0.779313  0.113848  pass
213    tabooness_janschewitz      358  0.775435  0.031614  pass
282  reproduction_vanarsdall      959  0.765422  0.053029  pass
280        living_vanarsdall      959  0.757800  0.039474  pass


eye_tracking:   0%|          | 0/292 [00:00<?, ?it/s]



                     norm  train_n   r2_mean     r2_sd check
174            aoa_hollis      428  0.131943  0.114504  pass
265            aoa_citron      164  0.131124  0.099251  pass
266   imageability_citron      164  0.103144  0.096603  pass
19            AoA_Glasgow     1603  0.068538  0.081079  pass
129       Practice_Binder      210  0.064307  0.053115  pass
92          Naming_RT_ELP     5403  0.048852  0.245407  pass
276            aoa_davies      405  0.045794  0.285400  pass
94   SemanticD_RT_Calgary     1461  0.045134  0.022611  pass
133           Near_Binder      239  0.036715  0.049855  pass
220   observability_britz      167  0.033098  0.064536  pass


SGSoftMaxOutput_SWOW:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                     norm  train_n   r2_mean     r2_sd check
218         valence_britz      376  0.562824  0.092797  pass
219      social_des_britz      376  0.560636  0.092795  pass
269  likableness_anderson      381  0.547727  0.101338  pass
187           happy_zupan      356  0.546821  0.122537  pass
255  likableness_chandler      576  0.533993  0.123907  pass
162       goals_wilkowski      661  0.530117  0.135432  pass
191         valence_zupan      356  0.527797  0.129881  pass
254    imagery_vanderveur      722  0.518544  0.116985  pass
190             sad_zupan      356  0.512055  0.126016  pass
196   happiness_stevenson      815  0.499735  0.110030  pass


morphoNLM:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                    norm  train_n   r2_mean     r2_sd check
254   imagery_vanderveur      747  0.698228  0.036144  pass
238       thought_troche      598  0.680661  0.026026  pass
236        visual_troche      598  0.635153  0.036507  pass
284     goals_vanarsdall      959  0.627399  0.024470  pass
177  concreteness_hollis      821  0.621280  0.032347  pass
235      morality_troche      598  0.612259  0.064149  pass
147     Cognition_Binder      421  0.590191  0.050161  pass
283    person_vanarsdall      959  0.588076  0.027411  pass
281   thought_vanarsdall      959  0.583188  0.026490  pass
205   concreteness_brown      498  0.576115  0.033621  pass


SGSoftMaxInput_SWOW:   0%|          | 0/292 [00:00<?, ?it/s]



                     norm  train_n   r2_mean     r2_sd check
218         valence_britz      296  0.840213  0.035970  pass
219      social_des_britz      296  0.832422  0.041230  pass
255  likableness_chandler      408  0.823287  0.020040  pass
269  likableness_anderson      290  0.807011  0.016465  pass
191         valence_zupan      254  0.790945  0.019829  pass
175        valence_hollis      762  0.776439  0.026196  pass
187           happy_zupan      254  0.774071  0.034708  pass
53        Valence_Glasgow     3252  0.769903  0.012680  pass
254    imagery_vanderveur      684  0.769867  0.016589  pass
162       goals_wilkowski      474  0.769406  0.031510  pass


fMRI_text_hyper_align:   0%|          | 0/292 [00:00<?, ?it/s]



                        norm  train_n   r2_mean     r2_sd check
155         Disgusted_Binder       68  0.062369  0.231635  pass
284         goals_vanarsdall       76  0.045279  0.154537  pass
204       emotionality_brown       86  0.035064  0.020081  pass
178             aoa_stration       20  0.033426  0.307480  pass
63        Humor_Overall_Enge      252  0.029178  0.081281  pass
278               this_rocca       68  0.018550  0.074744  pass
286  concreteness_vanarsdall       76  0.017739  0.111521  pass
144             Human_Binder       68  0.017543  0.155931  pass
109        Complexity_Binder       40  0.006460  0.175994  pass
157         Surprised_Binder       68  0.005446  0.065649  pass


GloVe_Wikipedia:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


                     norm  train_n   r2_mean     r2_sd check
218         valence_britz      382  0.788716  0.068266  pass
219      social_des_britz      382  0.778908  0.072640  pass
281    thought_vanarsdall      960  0.718127  0.048693  pass
238        thought_troche      600  0.709518  0.092018  pass
283     person_vanarsdall      960  0.707569  0.047245  pass
284      goals_vanarsdall      960  0.705896  0.062885  pass
236         visual_troche      600  0.692137  0.121478  pass
162       goals_wilkowski      830  0.689635  0.131215  pass
269  likableness_anderson      400  0.687371  0.087619  pass
177   concreteness_hollis      829  0.686559  0.107578  pass


EEG_text:   0%|          | 0/292 [00:00<?, ?it/s]



                    norm  train_n   r2_mean     r2_sd check
117      Audition_Binder      144  0.033027  0.047981  pass
21             DPoS_Brys     2538  0.001938  0.002308  pass
22             DPoS_VanH     2565  0.001123  0.002544  pass
29    Nmeanings_Websters     1882 -0.001270  0.009360  pass
73      Emot_Assoc_Trust     1359 -0.002224  0.007845  pass
72   Emot_Assoc_Surprise     1359 -0.002871  0.009421  pass
70   Emot_Assoc_Positive     1359 -0.003166  0.004961  pass
26       Nsenses_WordNet     2414 -0.004508  0.005205  pass
74         Sem_Diversity     2439 -0.005119  0.008655  pass
92         Naming_RT_ELP     2414 -0.005427  0.006751  pass


fastText_CommonCrawl:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                        norm  train_n   r2_mean     r2_sd check
281       thought_vanarsdall      960  0.859534  0.026334  pass
283        person_vanarsdall      960  0.838061  0.034412  pass
219         social_des_britz      384  0.837762  0.062587  pass
218            valence_britz      384  0.829951  0.074628  pass
284         goals_vanarsdall      960  0.826550  0.032045  pass
282  reproduction_vanarsdall      960  0.804118  0.016936  pass
24              Conc_Glasgow     3705  0.800999  0.041619  pass
280        living_vanarsdall      960  0.799536  0.017457  pass
213    tabooness_janschewitz      359  0.793216  0.037845  pass
177      concreteness_hollis      829  0.792856  0.058776  pass


fMRI_speech_hyper_align:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

              norm  train_n   r2_mean     r2_sd check
1          Freq_KF      448  0.290488  0.048425  pass
4        Freq_Blog      460  0.279755  0.041277  pass
11         CD_Blog      460  0.274388  0.040451  pass
3   Freq_SUBTLEXUK      459  0.273981  0.044918  pass
0         Freq_HAL      458  0.263375  0.063460  pass
2   Freq_SUBTLEXUS      458  0.255832  0.034846  pass
6        Freq_News      460  0.246059  0.034311  pass
13         CD_News      460  0.242593  0.034194  pass
5     Freq_Twitter      460  0.235931  0.040872  pass
12      CD_Twitter      460  0.235288  0.040276  pass


compo_attribs:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                    norm  train_n   r2_mean     r2_sd check
283    person_vanarsdall      164  0.952624  0.010731  pass
281   thought_vanarsdall      164  0.945300  0.010022  pass
236        visual_troche       89  0.922821  0.019470  pass
35        Gustatory_Lanc      404  0.907134  0.031705  pass
285  movement_vanarsdall      164  0.896720  0.022700  pass
284     goals_vanarsdall      164  0.884780  0.012676  pass
280    living_vanarsdall      164  0.872182  0.021847  pass
199       fear_stevenson      118  0.865163  0.025271  pass
23             Conc_Brys      404  0.863428  0.017888  pass
24          Conc_Glasgow      296  0.861586  0.022558  pass


PPMI_SVD_SWOW:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


                     norm  train_n   r2_mean     r2_sd check
255  likableness_chandler      408  0.853463  0.033045  pass
175        valence_hollis      762  0.851266  0.017573  pass
218         valence_britz      296  0.849316  0.039970  pass
53        Valence_Glasgow     3252  0.845925  0.010383  pass
219      social_des_britz      296  0.832966  0.044636  pass
162       goals_wilkowski      474  0.822246  0.034423  pass
56    Valence_Covid_Older     2315  0.820769  0.015224  pass
269  likableness_anderson      290  0.817729  0.038955  pass
196   happiness_stevenson      758  0.811910  0.014534  pass
187           happy_zupan      254  0.808208  0.048530  pass


microarray:   0%|          | 0/292 [00:00<?, ?it/s]

                    norm  train_n   r2_mean     r2_sd check
129      Practice_Binder       68  0.045140  0.102219  pass
39     Mouth_Throat_Lanc      392  0.031458  0.050935  pass
177  concreteness_hollis       67  0.005162  0.037918  pass
35        Gustatory_Lanc      392  0.003724  0.005836  pass
79          Cue_MeanConn      304  0.003092  0.026233  pass
21             DPoS_Brys      458  0.002348  0.000920  pass
22             DPoS_VanH      479  0.001981  0.001501  pass
36    Interoceptive_Lanc      392  0.001714  0.073324  pass
63    Humor_Overall_Enge      178  0.001364  0.062713  pass
16       Prevalence_Brys      396  0.000050  0.011798  pass


EEG_speech:   0%|          | 0/292 [00:00<?, ?it/s]



                 norm  train_n   r2_mean     r2_sd check
186     fearful_zupan       42  0.051012  0.177284  pass
188   intensity_zupan       42  0.041501  0.080793  pass
268   arousal_imbault      195  0.033178  0.036310  pass
289  familiarity_fear      103  0.033056  0.064010  pass
197   anger_stevenson      140  0.006568  0.059366  pass
128  LowerLimb_Binder       95  0.001699  0.027380  pass
22          DPoS_VanH     1215  0.001127  0.008701  pass
35     Gustatory_Lanc     1006  0.001123  0.005530  pass
21          DPoS_Brys     1184  0.001100  0.010096  pass
37          Head_Lanc     1006 -0.001862  0.014966  pass


GloVe_CommonCrawl:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                      norm  train_n   r2_mean     r2_sd check
218          valence_britz      384  0.825621  0.093826  pass
219       social_des_britz      384  0.819687  0.083322  pass
283      person_vanarsdall      960  0.815314  0.028712  pass
281     thought_vanarsdall      960  0.807988  0.020557  pass
284       goals_vanarsdall      960  0.804642  0.020717  pass
213  tabooness_janschewitz      359  0.798288  0.043338  pass
162        goals_wilkowski      842  0.763279  0.098425  pass
255   likableness_chandler      644  0.755676  0.083720  pass
175         valence_hollis      829  0.740462  0.068868  pass
280      living_vanarsdall      960  0.739901  0.025244  pass


THINGS:   0%|          | 0/292 [00:00<?, ?it/s]



                        norm  train_n   r2_mean     r2_sd check
105         Biomotion_Binder      128  0.867574  0.038378  pass
280        living_vanarsdall      376  0.843066  0.018136  pass
282  reproduction_vanarsdall      376  0.828345  0.025951  pass
35            Gustatory_Lanc     1235  0.813892  0.040897  pass
111              Body_Binder      128  0.797362  0.067834  pass
124             Taste_Binder      128  0.793488  0.107142  pass
122             Music_Binder      128  0.758826  0.076604  pass
104            Motion_Binder      128  0.726740  0.025597  pass
131              Path_Binder      128  0.725024  0.064865  pass
281       thought_vanarsdall      376  0.724527  0.083121  pass


Unnamed: 0,embed,embed_type,norm,train_n,test_n,p,r2_mean,r2_sd,mse_mean,mse_sd,check
0,CBOW_GoogleNews,text,Freq_HAL,28012,7003,300,0.522106,0.008390,2.715519,0.072449,pass
1,CBOW_GoogleNews,text,Freq_KF,19285,4822,300,0.500385,0.009733,0.156678,0.004765,pass
2,CBOW_GoogleNews,text,Freq_SUBTLEXUS,28636,7159,300,0.537246,0.009834,0.361360,0.007800,pass
3,CBOW_GoogleNews,text,Freq_SUBTLEXUK,29316,7330,300,0.545626,0.008433,0.446059,0.009071,pass
4,CBOW_GoogleNews,text,Freq_Blog,31876,7969,300,0.523688,0.008467,0.400176,0.009336,pass
...,...,...,...,...,...,...,...,...,...,...,...
7295,THINGS,behavior,familiarity_vanarsdall,376,95,49,0.060692,0.083942,3326.617511,620.203961,pass
7296,THINGS,behavior,imageability_vanarsdall,376,95,49,0.053593,0.095680,1345.018025,292.430129,pass
7297,THINGS,behavior,familiarity_fear,173,44,49,0.139160,0.160894,0.790788,0.216911,pass
7298,THINGS,behavior,aoa_fear,173,44,49,-0.021206,0.121789,0.533727,0.039008,pass
