To do:
1. double check logstic convergence issues
2. check alpha range in supplementary repo

In [39]:
import os
import json
import pickle
import pandas as pd
import numpy as np
from sklearn.linear_model import RidgeCV, LogisticRegressionCV
from sklearn.model_selection import StratifiedKFold
from tqdm.notebook import tqdm
from rca import make_binary_scorer, make_multiclass_scorer, process_categorical, best_logistic_solver, checker, k_fold_cross_val

## Loading Data

In [2]:
# Pulling and standardising embeddings
embeds = {}
embeds_path = '../../data/raw/embeds/'
for f_name in tqdm(os.listdir(embeds_path)):
    if f_name != 'feature_overlap.csv':  # dropping since contains many NaNs
        embed = pd.read_csv(embeds_path + f_name, index_col=0)
        embed_name = f_name.split('.')[0]
        embeds[embed_name] = (embed - embed.mean()) / embed.std()

{name: embed.shape for name, embed in embeds.items()}

  0%|          | 0/26 [00:00<?, ?it/s]

{'CBOW_GoogleNews': (79279, 300),
 'PPMI_SVD_SouthFlorida': (4959, 300),
 'SVD_sim_rel': (6002, 300),
 'spherical_text_Wikipedia': (59012, 300),
 'norms_sensorimotor': (36854, 11),
 'fastText_Wiki_News': (81728, 300),
 'PPMI_SVD_EAT': (7775, 300),
 'GloVe_Twitter': (48614, 200),
 'LexVec_CommonCrawl': (87635, 300),
 'fastTextSub_OpenSub': (72538, 300),
 'eye_tracking': (7486, 6),
 'SGSoftMaxOutput_SWOW': (25442, 300),
 'morphoNLM': (50506, 50),
 'SGSoftMaxInput_SWOW': (11783, 300),
 'fMRI_text_hyper_align': (1205, 1000),
 'GloVe_Wikipedia': (68943, 300),
 'EEG_text': (3355, 104),
 'fastText_CommonCrawl': (88986, 300),
 'fMRI_speech_hyper_align': (579, 6),
 'compo_attribs': (534, 62),
 'PPMI_SVD_SWOW': (11783, 300),
 'microarray': (626, 15),
 'EEG_speech': (1591, 130),
 'GloVe_CommonCrawl': (88440, 300),
 'THINGS': (1562, 49)}

In [18]:
norms = pd.read_csv('../../data/raw/psychNorms.zip', index_col=0, compression='zip', low_memory=False)
norm_metadata = pd.read_csv('../../data/raw/psychNorms_metadata.csv', index_col='norm')
norm_metadata['associated_embed'] = norm_metadata['associated_embed'].astype(str)
norms

Unnamed: 0,Freq_HAL,Freq_KF,Freq_SUBTLEXUS,Freq_SUBTLEXUK,Freq_Blog,Freq_Twitter,Freq_News,Freq_CobW,Freq_CobS,CD_SUBTLEXUS,...,reproduction_vanarsdall,person_vanarsdall,goals_vanarsdall,movement_vanarsdall,concreteness_vanarsdall,familiarity_vanarsdall,imageability_vanarsdall,familiarity_fear,aoa_fear,imageability_fear
'em,0.0,,,,,,,1.3617,1.9138,,...,,,,,,,,,,
'neath,0.0,,,,,,,0.0000,0.0000,,...,,,,,,,,,,
're,0.0,,,,,,,0.9031,1.6335,,...,,,,,,,,,,
'shun,0.0,,,,,,,0.0000,0.0000,,...,,,,,,,,,,
'tis,0.0,,,,,,,0.4771,0.6021,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
shrick,,,,,,,,,,,...,,,,,,,,2.62,4.38,2.93
post office,,,,,,,,,,,...,,,,,,,,3.79,3.07,5.29
fishing rod,,,,,,,,,,,...,,,,,,,,2.29,3.38,5.64
March,,,,,,,,,,,...,,,,,,,,3.43,2.76,3.50


In [None]:
# Log transforming selected norms
with open('../../data/processed/norms_to_log.pkl', 'rb') as f:
    norms_to_log = pickle.load(f)
    norms[norms_to_log] = norms[norms_to_log].apply(np.log1p)

with open('../../data/raw/embed_to_dtype.json', 'r') as f:
    embed_to_type = json.load(f)
embed_to_type

## Cross Validation

In [3]:
# Ridge
alphas = np.logspace(-3, 3, 6)
ridge = RidgeCV(alphas=alphas)

# Logistic hyperparameters
Cs = 1 / alphas
inner_cv = 5
penalty = 'l2'

# Scorers
binary_scorer = make_binary_scorer()
multiclass_scorer = make_multiclass_scorer()

# outer_cv setting 
outer_cv, n_jobs = 5, 8

[1.00000000e-03 1.58489319e-02 2.51188643e-01 3.98107171e+00
 6.30957344e+01 1.00000000e+03]


In [6]:
# RCA
rca = []
for embed_name in tqdm(embeds.keys()):
    embed = embeds[embed_name]
    
    to_print = []
    for norm_name in tqdm(norms.columns, desc=embed_name):
        
        # Aligning data
        y = norms[norm_name].dropna()
        X, y = embed.align(y, axis=0, join='inner', copy=True)
        
        # Checking norm dtype 
        norm_dtype = norm_metadata.loc[norm_name, 'type']
        
        # Solvers, scoring, estimators
        if norm_dtype in ['binary', 'multiclass']:
            X, y = process_categorical(outer_cv, inner_cv, X, y)
            
            # may have switched form multi to bin after processing
            norm_dtype = 'binary' if len(y.unique()) == 2 else 'multiclass'
            
            # Cross validation settings for logistic regression
            solver = best_logistic_solver(X, norm_dtype)
            
            # Defining logistic regression 
            estimator = LogisticRegressionCV(
                Cs=Cs, penalty=penalty, cv=StratifiedKFold(inner_cv),
                solver=solver, n_jobs=8
            )
            scoring = binary_scorer if norm_dtype == 'binary' else multiclass_scorer
        else: # continuous
            estimator, scoring = ridge, 'r2'

            
        # Cross validation
        check = checker(
            embed_name, y, norm_dtype, norm_metadata, outer_cv, norm_name
        )
        if check == 'pass':
             scores = k_fold_cross_val(estimator, X, y, outer_cv, scoring, n_jobs)
             r2_mean, r2_sd = scores.mean(), scores.std()
        else:
            r2_mean, r2_sd = np.nan, np.nan
            
        # Saving
        train_n = int(((outer_cv - 1) / outer_cv) * len(X))
        p = X.shape[1]
        embed_type = embed_to_type[embed_name]
        rca.append([embed_name, embed_type, norm_name, train_n, p, r2_mean, r2_sd, check])
        
        to_print.append([norm_name, train_n, r2_mean, r2_sd, check])

    to_print = pd.DataFrame(to_print, columns=['norm' , 'train_n', 'r2_mean', 'r2_sd', 'check'])
    print(to_print.sort_values('r2_mean', ascending=False).head(10))

rca = pd.DataFrame(rca, columns=['embed', 'embed_type', 'norm', 'train_n', 'p', 'r2_mean', 'r2_sd', 'check'])
rca.to_csv('../../data/final/rca.csv', index=False)
rca

  0%|          | 0/25 [00:00<?, ?it/s]

CBOW_GoogleNews:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                        norm  train_n   r2_mean     r2_sd check
270  meaningfulness_anderson       44  0.920117  0.050454  pass
281       thought_vanarsdall      960  0.805612  0.021746  pass
284         goals_vanarsdall      960  0.802631  0.025427  pass
283        person_vanarsdall      960  0.799361  0.014477  pass
218            valence_britz      380  0.794689  0.035882  pass
219         social_des_britz      380  0.789339  0.037873  pass
162          goals_wilkowski      842  0.787918  0.024260  pass
24              Conc_Glasgow     3694  0.781725  0.009272  pass
177      concreteness_hollis      830  0.778592  0.015293  pass
282  reproduction_vanarsdall      960  0.778579  0.023920  pass


PPMI_SVD_SouthFlorida:   0%|          | 0/292 [00:00<?, ?it/s]

Traceback (most recent call last):
  File "/datapool-1/homepoint/zhussain/anaconda3/lib/python3.9/site-packages/pandas/core/nanops.py", line 1622, in _ensure_numeric
    x = float(x)
ValueError: could not convert string to float: 'VisualVisualHapticVisualVisualVisualVisualVisualVisualVisualHapticHapticVisualHapticVisualVisualVisualVisualHapticVisualHapticVisualVisualVisual'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/datapool-1/homepoint/zhussain/anaconda3/lib/python3.9/site-packages/pandas/core/nanops.py", line 1626, in _ensure_numeric
    x = complex(x)
ValueError: complex() arg is a malformed string

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/datapool-1/homepoint/zhussain/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/datapool-1/homepoint

                  norm  train_n   r2_mean     r2_sd check
78         Cue_SetSize     3963  0.680452  0.021580  pass
35      Gustatory_Lanc     3814  0.672041  0.016565  pass
23           Conc_Brys     3815  0.655279  0.016208  pass
24        Conc_Glasgow     2205  0.612242  0.015712  pass
53     Valence_Glasgow     2205  0.589302  0.023232  pass
25        Imag_Glasgow     2205  0.588114  0.023052  pass
284   goals_vanarsdall      960  0.578370  0.033527  pass
175     valence_hollis      596  0.577282  0.036165  pass
51          Socialness     1946  0.575751  0.015984  pass
283  person_vanarsdall      960  0.575688  0.020180  pass


SVD_sim_rel:   0%|          | 0/292 [00:00<?, ?it/s]



                         norm  train_n   r2_mean     r2_sd check
122              Music_Binder      283  0.489491  0.123858  pass
249  visual_complexity_marrow      294  0.298847  0.058843  pass
35             Gustatory_Lanc     4113  0.295551  0.121860  pass
280         living_vanarsdall      697  0.292883  0.079814  pass
285       movement_vanarsdall      697  0.285231  0.077082  pass
165                haptic_lyn       68  0.278858  0.064090  pass
282   reproduction_vanarsdall      697  0.271412  0.084066  pass
24               Conc_Glasgow     1834  0.264933  0.104800  pass
25               Imag_Glasgow     1834  0.251730  0.105068  pass
42                        BOI     2320  0.250690  0.109046  pass


spherical_text_Wikipedia:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                        norm  train_n   r2_mean     r2_sd check
270  meaningfulness_anderson       44  0.903833  0.022125  pass
281       thought_vanarsdall      960  0.724667  0.031912  pass
24              Conc_Glasgow     3720  0.721937  0.086431  pass
283        person_vanarsdall      960  0.713342  0.029314  pass
238           thought_troche      600  0.709911  0.083027  pass
284         goals_vanarsdall      960  0.696817  0.058565  pass
218            valence_britz      390  0.687431  0.113214  pass
219         social_des_britz      390  0.684177  0.114634  pass
162          goals_wilkowski      792  0.683381  0.116495  pass
236            visual_troche      600  0.683214  0.120989  pass


norms_sensorimotor:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                        norm  train_n   r2_mean     r2_sd check
270  meaningfulness_anderson       44  0.926173  0.044691  pass
124             Taste_Binder      404  0.914900  0.025870  pass
125             Smell_Binder      404  0.855016  0.024182  pass
166             auditory_lyn      292  0.851176  0.031339  pass
167            olfactory_lyn      292  0.829461  0.031711  pass
168            gustatory_lyn      292  0.822265  0.049779  pass
169    dominant_modality_lyn      275  0.781174  0.058839  pass
165               haptic_lyn      292  0.773093  0.054113  pass
164               visual_lyn      292  0.755749  0.056419  pass
112             Touch_Binder      404  0.749467  0.023388  pass


fastText_Wiki_News:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                        norm  train_n   r2_mean     r2_sd check
281       thought_vanarsdall      960  0.830407  0.027126  pass
219         social_des_britz      399  0.821469  0.066317  pass
218            valence_britz      399  0.819651  0.067706  pass
283        person_vanarsdall      960  0.803696  0.049922  pass
284         goals_vanarsdall      960  0.803415  0.030641  pass
24              Conc_Glasgow     3734  0.772717  0.059950  pass
162          goals_wilkowski      848  0.757183  0.080262  pass
255     likableness_chandler      752  0.746588  0.078572  pass
280        living_vanarsdall      960  0.746493  0.021291  pass
282  reproduction_vanarsdall      960  0.743117  0.027864  pass


PPMI_SVD_EAT:   0%|          | 0/292 [00:00<?, ?it/s]

Traceback (most recent call last):
  File "/datapool-1/homepoint/zhussain/anaconda3/lib/python3.9/site-packages/pandas/core/nanops.py", line 1622, in _ensure_numeric
    x = float(x)
ValueError: could not convert string to float: 'HapticHapticHapticHapticHapticHapticVisualVisualVisualVisualVisualVisualVisualVisualVisualVisualVisualVisualVisualVisualHapticVisualHapticVisualVisualVisualVisualVisualVisualVisual'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/datapool-1/homepoint/zhussain/anaconda3/lib/python3.9/site-packages/pandas/core/nanops.py", line 1626, in _ensure_numeric
    x = complex(x)
ValueError: complex() arg is a malformed string

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/datapool-1/homepoint/zhussain/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y

                    norm  train_n   r2_mean     r2_sd check
254   imagery_vanderveur      669  0.736789  0.034730  pass
23             Conc_Brys     5052  0.662073  0.012838  pass
24          Conc_Glasgow     2346  0.641315  0.015923  pass
122         Music_Binder      322  0.625332  0.075830  pass
53       Valence_Glasgow     2346  0.623306  0.009311  pass
56   Valence_Covid_Older     1580  0.610636  0.023803  pass
165           haptic_lyn      147  0.610528  0.063918  pass
25          Imag_Glasgow     2346  0.605545  0.012308  pass
51            Socialness     2300  0.594224  0.018779  pass
218        valence_britz      141  0.593403  0.118236  pass


GloVe_Twitter:   0%|          | 0/292 [00:00<?, ?it/s]



                        norm  train_n   r2_mean     r2_sd check
270  meaningfulness_anderson       44  0.913104  0.056091  pass
219         social_des_britz      383  0.738393  0.074573  pass
218            valence_britz      383  0.724088  0.088273  pass
281       thought_vanarsdall      960  0.714239  0.029642  pass
283        person_vanarsdall      960  0.706318  0.034210  pass
284         goals_vanarsdall      960  0.700997  0.029121  pass
177      concreteness_hollis      829  0.694596  0.063333  pass
236            visual_troche      600  0.668120  0.070742  pass
24              Conc_Glasgow     3724  0.666429  0.057015  pass
213    tabooness_janschewitz      366  0.658742  0.051499  pass


LexVec_CommonCrawl:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                        norm  train_n   r2_mean     r2_sd check
270  meaningfulness_anderson       44  0.896757  0.037499  pass
283        person_vanarsdall      960  0.791932  0.022938  pass
281       thought_vanarsdall      960  0.791456  0.026679  pass
213    tabooness_janschewitz      367  0.784096  0.035871  pass
284         goals_vanarsdall      960  0.782973  0.036484  pass
24              Conc_Glasgow     3731  0.770235  0.041633  pass
236            visual_troche      600  0.759247  0.061261  pass
218            valence_britz      379  0.745004  0.124962  pass
124             Taste_Binder      425  0.744672  0.124959  pass
219         social_des_britz      379  0.739657  0.117609  pass


fastTextSub_OpenSub:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                        norm  train_n   r2_mean     r2_sd check
270  meaningfulness_anderson       44  0.910090  0.062148  pass
281       thought_vanarsdall      959  0.823448  0.034276  pass
283        person_vanarsdall      959  0.804831  0.046035  pass
177      concreteness_hollis      830  0.801229  0.047859  pass
284         goals_vanarsdall      959  0.797879  0.056233  pass
24              Conc_Glasgow     3730  0.795945  0.058560  pass
218            valence_britz      380  0.785868  0.111732  pass
219         social_des_britz      380  0.778143  0.117126  pass
282  reproduction_vanarsdall      959  0.765079  0.052656  pass
213    tabooness_janschewitz      366  0.763382  0.043227  pass


eye_tracking:   0%|          | 0/292 [00:00<?, ?it/s]



                         norm  train_n   r2_mean     r2_sd check
174                aoa_hollis      428  0.131943  0.114504  pass
19                AoA_Glasgow     1603  0.068538  0.081079  pass
129           Practice_Binder      210  0.064307  0.053115  pass
92              Naming_RT_ELP     5403  0.048852  0.245407  pass
276                aoa_davies      405  0.045794  0.285400  pass
94       SemanticD_RT_Calgary     1461  0.045134  0.022611  pass
133               Near_Binder      239  0.036715  0.049855  pass
220       observability_britz      167  0.033098  0.064536  pass
216  imageability_janschewitz      146  0.031267  0.022376  pass
226        familiarity_juhasz       51  0.029896  0.149117  pass


SGSoftMaxOutput_SWOW:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                        norm  train_n   r2_mean     r2_sd check
270  meaningfulness_anderson       44  0.902980  0.047411  pass
218            valence_britz      376  0.562824  0.092797  pass
219         social_des_britz      376  0.560636  0.092795  pass
187              happy_zupan      356  0.546821  0.122537  pass
255     likableness_chandler      576  0.533993  0.123907  pass
162          goals_wilkowski      661  0.530117  0.135432  pass
191            valence_zupan      356  0.527797  0.129881  pass
254       imagery_vanderveur      722  0.518544  0.116985  pass
190                sad_zupan      356  0.512055  0.126016  pass
196      happiness_stevenson      811  0.501210  0.106105  pass


morphoNLM:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                        norm  train_n   r2_mean     r2_sd check
254       imagery_vanderveur      760  0.696069  0.036422  pass
238           thought_troche      598  0.680606  0.026106  pass
270  meaningfulness_anderson       43  0.657062  0.239995  pass
236            visual_troche      598  0.635413  0.036702  pass
284         goals_vanarsdall      959  0.627339  0.024230  pass
177      concreteness_hollis      822  0.622581  0.031380  pass
235          morality_troche      598  0.612366  0.064061  pass
147         Cognition_Binder      421  0.589596  0.050907  pass
283        person_vanarsdall      959  0.587947  0.027094  pass
281       thought_vanarsdall      959  0.583154  0.025956  pass


SGSoftMaxInput_SWOW:   0%|          | 0/292 [00:00<?, ?it/s]



                     norm  train_n   r2_mean     r2_sd check
218         valence_britz      296  0.840213  0.035970  pass
219      social_des_britz      296  0.832422  0.041230  pass
255  likableness_chandler      408  0.823287  0.020040  pass
191         valence_zupan      254  0.790945  0.019829  pass
175        valence_hollis      762  0.776439  0.026196  pass
187           happy_zupan      254  0.774071  0.034708  pass
53        Valence_Glasgow     3252  0.769903  0.012680  pass
254    imagery_vanderveur      684  0.769867  0.016589  pass
162       goals_wilkowski      474  0.769406  0.031510  pass
56    Valence_Covid_Older     2315  0.743913  0.012298  pass


fMRI_text_hyper_align:   0%|          | 0/292 [00:00<?, ?it/s]



                        norm  train_n   r2_mean     r2_sd check
155         Disgusted_Binder       68  0.062369  0.231635  pass
284         goals_vanarsdall       76  0.045279  0.154537  pass
204       emotionality_brown       86  0.035064  0.020081  pass
178             aoa_stration       20  0.033426  0.307480  pass
63        Humor_Overall_Enge      252  0.029178  0.081281  pass
278               this_rocca       68  0.018550  0.074744  pass
286  concreteness_vanarsdall       76  0.017739  0.111521  pass
144             Human_Binder       68  0.017543  0.155931  pass
109        Complexity_Binder       40  0.006460  0.175994  pass
157         Surprised_Binder       68  0.005446  0.065649  pass


GloVe_Wikipedia:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                     norm  train_n   r2_mean     r2_sd check
218         valence_britz      393  0.789743  0.054609  pass
219      social_des_britz      393  0.778041  0.056983  pass
281    thought_vanarsdall      960  0.718878  0.048228  pass
238        thought_troche      600  0.711109  0.092515  pass
283     person_vanarsdall      960  0.708306  0.046865  pass
284      goals_vanarsdall      960  0.706904  0.062015  pass
236         visual_troche      600  0.693071  0.120926  pass
162       goals_wilkowski      835  0.688553  0.133176  pass
177   concreteness_hollis      832  0.685509  0.106493  pass
255  likableness_chandler      675  0.672957  0.101035  pass


EEG_text:   0%|          | 0/292 [00:00<?, ?it/s]



                    norm  train_n   r2_mean     r2_sd check
117      Audition_Binder      144  0.033027  0.047981  pass
21             DPoS_Brys     2538  0.001929  0.002308  pass
22             DPoS_VanH     2565  0.001124  0.002549  pass
28   Nmeanings_Wordsmyth       68  0.000935  0.094880  pass
73      Emot_Assoc_Trust     1359 -0.002225  0.007845  pass
72   Emot_Assoc_Surprise     1359 -0.002872  0.009420  pass
70   Emot_Assoc_Positive     1359 -0.003166  0.004961  pass
26       Nsenses_WordNet     2414 -0.004530  0.003279  pass
74         Sem_Diversity     2439 -0.005119  0.008655  pass
92         Naming_RT_ELP     2414 -0.005427  0.006751  pass


fastText_CommonCrawl:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                        norm  train_n   r2_mean     r2_sd check
281       thought_vanarsdall      960  0.860923  0.026118  pass
283        person_vanarsdall      960  0.839748  0.034257  pass
219         social_des_britz      399  0.835309  0.060957  pass
284         goals_vanarsdall      960  0.828248  0.031918  pass
218            valence_britz      399  0.827937  0.073532  pass
282  reproduction_vanarsdall      960  0.805487  0.016931  pass
280        living_vanarsdall      960  0.801339  0.017159  pass
24              Conc_Glasgow     3734  0.800168  0.042045  pass
213    tabooness_janschewitz      368  0.797736  0.036753  pass
177      concreteness_hollis      831  0.795878  0.059325  pass


fMRI_speech_hyper_align:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

              norm  train_n   r2_mean     r2_sd check
1          Freq_KF      448  0.290488  0.048425  pass
4        Freq_Blog      460  0.279755  0.041277  pass
11         CD_Blog      460  0.274388  0.040451  pass
3   Freq_SUBTLEXUK      459  0.273981  0.044918  pass
0         Freq_HAL      458  0.263375  0.063460  pass
2   Freq_SUBTLEXUS      458  0.255832  0.034846  pass
6        Freq_News      460  0.246059  0.034311  pass
13         CD_News      460  0.242593  0.034194  pass
5     Freq_Twitter      460  0.235931  0.040872  pass
12      CD_Twitter      460  0.235288  0.040276  pass


compo_attribs:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                    norm  train_n   r2_mean     r2_sd check
283    person_vanarsdall      164  0.952624  0.010731  pass
281   thought_vanarsdall      164  0.945300  0.010022  pass
236        visual_troche       89  0.922821  0.019470  pass
35        Gustatory_Lanc      404  0.907134  0.031705  pass
285  movement_vanarsdall      164  0.896720  0.022700  pass
284     goals_vanarsdall      164  0.884780  0.012676  pass
280    living_vanarsdall      164  0.872182  0.021847  pass
199       fear_stevenson      118  0.865163  0.025271  pass
23             Conc_Brys      404  0.863428  0.017888  pass
24          Conc_Glasgow      296  0.861586  0.022558  pass


PPMI_SVD_SWOW:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


                     norm  train_n   r2_mean     r2_sd check
255  likableness_chandler      408  0.853463  0.033045  pass
175        valence_hollis      762  0.851266  0.017573  pass
218         valence_britz      296  0.849316  0.039970  pass
53        Valence_Glasgow     3252  0.845925  0.010383  pass
219      social_des_britz      296  0.832966  0.044636  pass
162       goals_wilkowski      474  0.822246  0.034423  pass
56    Valence_Covid_Older     2315  0.820769  0.015224  pass
196   happiness_stevenson      754  0.810465  0.012936  pass
187           happy_zupan      254  0.808208  0.048530  pass
191         valence_zupan      254  0.799801  0.041966  pass


microarray:   0%|          | 0/292 [00:00<?, ?it/s]

                    norm  train_n   r2_mean     r2_sd check
129      Practice_Binder       68  0.045140  0.102219  pass
39     Mouth_Throat_Lanc      392  0.031458  0.050935  pass
177  concreteness_hollis       67  0.005162  0.037918  pass
35        Gustatory_Lanc      392  0.003724  0.005836  pass
79          Cue_MeanConn      304  0.003092  0.026233  pass
21             DPoS_Brys      458  0.002348  0.000920  pass
22             DPoS_VanH      479  0.001981  0.001501  pass
36    Interoceptive_Lanc      392  0.001714  0.073324  pass
63    Humor_Overall_Enge      178  0.001364  0.062713  pass
16       Prevalence_Brys      396  0.000050  0.011798  pass


EEG_speech:   0%|          | 0/292 [00:00<?, ?it/s]



                 norm  train_n   r2_mean     r2_sd check
186     fearful_zupan       42  0.051012  0.177284  pass
188   intensity_zupan       42  0.041501  0.080793  pass
268   arousal_imbault      195  0.033178  0.036310  pass
289  familiarity_fear      103  0.033056  0.064010  pass
128  LowerLimb_Binder       95  0.001699  0.027380  pass
22          DPoS_VanH     1215  0.001124  0.008703  pass
35     Gustatory_Lanc     1006  0.001123  0.005530  pass
21          DPoS_Brys     1184  0.001096  0.010104  pass
37          Head_Lanc     1006 -0.001862  0.014966  pass
43               CBOI      715 -0.003446  0.019760  pass


GloVe_CommonCrawl:   0%|          | 0/292 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                      norm  train_n   r2_mean     r2_sd check
218          valence_britz      399  0.824315  0.103275  pass
219       social_des_britz      399  0.819899  0.090364  pass
283      person_vanarsdall      960  0.815933  0.028905  pass
281     thought_vanarsdall      960  0.809162  0.020676  pass
284       goals_vanarsdall      960  0.805430  0.020911  pass
213  tabooness_janschewitz      368  0.787859  0.041488  pass
162        goals_wilkowski      848  0.756371  0.103501  pass
175         valence_hollis      831  0.747624  0.074644  pass
255   likableness_chandler      767  0.746641  0.084984  pass
280      living_vanarsdall      960  0.741454  0.025185  pass


THINGS:   0%|          | 0/292 [00:00<?, ?it/s]



                        norm  train_n   r2_mean     r2_sd check
105         Biomotion_Binder      128  0.867574  0.038378  pass
280        living_vanarsdall      376  0.843066  0.018136  pass
282  reproduction_vanarsdall      376  0.828345  0.025951  pass
35            Gustatory_Lanc     1235  0.813892  0.040897  pass
111              Body_Binder      128  0.797362  0.067834  pass
124             Taste_Binder      128  0.793488  0.107142  pass
122             Music_Binder      128  0.758826  0.076604  pass
104            Motion_Binder      128  0.726740  0.025597  pass
131              Path_Binder      128  0.725024  0.064865  pass
281       thought_vanarsdall      376  0.724527  0.083121  pass


Unnamed: 0,embed,embed_type,norm,train_n,p,r2_mean,r2_sd,check
0,CBOW_GoogleNews,text,Freq_HAL,51174,300,0.422344,0.006255,pass
1,CBOW_GoogleNews,text,Freq_KF,26605,300,0.463358,0.009852,pass
2,CBOW_GoogleNews,text,Freq_SUBTLEXUS,43939,300,0.488748,0.006706,pass
3,CBOW_GoogleNews,text,Freq_SUBTLEXUK,47398,300,0.479608,0.008173,pass
4,CBOW_GoogleNews,text,Freq_Blog,53251,300,0.463084,0.006165,pass
...,...,...,...,...,...,...,...,...
7295,THINGS,behavior,familiarity_vanarsdall,376,49,0.060692,0.083942,pass
7296,THINGS,behavior,imageability_vanarsdall,376,49,0.053593,0.095680,pass
7297,THINGS,behavior,familiarity_fear,173,49,0.139160,0.160894,pass
7298,THINGS,behavior,aoa_fear,173,49,-0.021206,0.121789,pass
