To do:
1. double check logstic convergence issues
2. do 5-fold

In [1]:
import os
import json
import pandas as pd
import numpy as np
from sklearn.metrics import make_scorer
from sklearn.linear_model import RidgeCV, LogisticRegressionCV
from sklearn.model_selection import StratifiedKFold
from tqdm.notebook import tqdm
from rca import mcfadden_r2_binary, mcfadden_r2_multiclass, process_categorical, best_logistic_solver, checker, k_fold_cross_val
np.seterr(divide='ignore', invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

## Loading Data

In [2]:
# Pulling and standardising embeddings
embeds = {}
for f_name in os.listdir('../../data/processed/pulled_embeds/'):
    embed = pd.read_pickle(f'../../data/processed/pulled_embeds/{f_name}')
    embed_name = f_name.split('.')[0]
    embeds[embed_name] = (embed - embed.mean()) / embed.std()

{name: embed.shape for name, embed in embeds.items()}

{'SVD_sim_rel': (6001, 300),
 'eye_motor_response': (7469, 6),
 'fastText_Wiki_News': (81696, 300),
 'fastTextSub_OpenSub': (72511, 300),
 'SGSoftMaxEncoder_SWOW': (11781, 300),
 'PPMI_SVD_SWOW': (11781, 300),
 'GloVe_Twitter': (48587, 200),
 'GloVe_Wikipedia': (68921, 300),
 'GloVe_CommonCrawl': (88408, 300),
 'fMRI_text_cos_align': (1200, 1000),
 'fMRI_speech_hyper_align': (579, 6),
 'THINGS': (1562, 49),
 'CBOW_GoogleNews': (79253, 300),
 'EEG_speech': (1591, 130),
 'BERT_last4_Book_Wikipedia': (24761, 1024),
 'compo_attribs': (534, 62),
 'morphoNLM': (50480, 50),
 'fMRI_text_hyper_align': (1200, 1000),
 'norms_sensorimotor': (36851, 11),
 'EEG_text': (3342, 104),
 'LexVec_CommonCrawl': (87613, 300),
 'fMRI_speech_cos_align': (579, 6),
 'fastText_CommonCrawl': (88953, 300),
 'spherical_text_Wikipedia': (58987, 300),
 'BERT_first4_Book_Wikipedia': (24761, 1024),
 'SGSoftMaxDecoder_SWOW': (25435, 300)}

In [2]:
norms = pd.read_csv('../../data/raw/psychNorms.zip', index_col=0, compression='zip')
norm_metadata = pd.read_csv('../../data/raw/psychNorms_metadata.csv', index_col=0)
norm_metadata['associated_embed'] = norm_metadata['associated_embed'].astype(str)
norms

  norms = pd.read_csv('../../data/final/norms.csv', index_col=0)


Unnamed: 0,Freq_HAL,Freq_KF,Freq_SUBTLEXUS,Freq_SUBTLEXUK,Freq_Blog,Freq_Twitter,Freq_News,Freq_CobW,Freq_CobS,CD_SUBTLEXUS,...,iconicity_winter_2017,living_vanarsdall,thought_vanarsdall,reproduction_vanarsdall,person_vanarsdall,goals_vanarsdall,movement_vanarsdall,concreteness_vanarsdall,familiarity_vanarsdall,imageability_vanarsdall
'em,0.0,,,,,,,1.3617,1.9138,,...,,,,,,,,,,
'neath,0.0,,,,,,,0.0000,0.0000,,...,,,,,,,,,,
're,0.0,,,,,,,0.9031,1.6335,,...,,,,,,,,,,
'shun,0.0,,,,,,,0.0000,0.0000,,...,,,,,,,,,,
'tis,0.0,,,,,,,0.4771,0.6021,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
trappy,,,,,,,,,,,...,,,,,,,,,,
vocalise,,,,,,,,,,,...,,,,,,,,,,
listened..to.,,,,,,,,,,,...,,,,,,,,,,
spoke..to.,,,,,,,,,,,...,,,,,,,,,,


In [8]:
norms.count().sum()

2851952

In [2]:
with open('../../data/raw/dtype_to_embed.json', 'r') as f:
    type_to_embed = json.load(f)
    
embed_to_type = {}
for type, embeds in type_to_embed.items():
    for embed in embeds:
        embed_to_type[embed] = type
embed_to_type

{'CBOW_GoogleNews': 'text',
 'fastText_CommonCrawl': 'text',
 'fastText_Wiki_News': 'text',
 'fastTextSub_OpenSub': 'text',
 'GloVe_CommonCrawl': 'text',
 'GloVe_Twitter': 'text',
 'GloVe_Wikipedia': 'text',
 'LexVec_CommonCrawl': 'text',
 'morphoNLM': 'text',
 'spherical_text_Wikipedia': 'text',
 'eye_tracking': 'brain',
 'EEG_speech': 'brain',
 'EEG_text': 'brain',
 'fMRI_speech_hyper_align': 'brain',
 'fMRI_text_hyper_align': 'brain',
 'microarray': 'brain',
 'PPMI_SVD_SWOW': 'behavior',
 'SGSoftMaxInput_SWOW': 'behavior',
 'SGSoftMaxOutput_SWOW': 'behavior',
 'PPMI_SVD_SOUTH_FLORIDA': 'behavior',
 'THINGS': 'behavior',
 'feature_overlap': 'behavior',
 'norms_sensorimotor': 'behavior',
 'compo_attribs': 'behavior',
 'SVD_sim_rel': 'behavior'}

## Cross Validation

In [48]:
# Ridge
alphas = np.logspace(-3, 3, 6)
ridge = RidgeCV(alphas=alphas)

# Logistic hyperparameters
Cs = 1 / alphas
inner_cv = 5
penalty = 'l2'

# Scorers
binary_scorer = make_scorer(mcfadden_r2_binary, needs_proba=True, greater_is_better=True)
multiclass_scorer = make_scorer(mcfadden_r2_multiclass, needs_proba=True, greater_is_better=True)

# outer_cv setting 
outer_cv, n_jobs = 5, 8

In [49]:
# RCA
rca = []
for embed_name in tqdm(embeds.keys()):
    embed = embeds[embed_name]
    
    to_print = []
    for norm_name in tqdm(norms.columns, desc=embed_name):
        
        # Aligning data
        norm = norms[norm_name].dropna()
        embed, norm = embed.align(norm, axis=0, join='inner')
        
        # Checking norm dtype 
        norm_dtype = norm_metadata.loc[norm_name, 'type']
        
        # Solvers, scoring, estimators
        if norm_dtype in ['binary', 'multiclass']:
            embed, norm = process_categorical(embed, norm, outer_cv, inner_cv)
            
            # may have switched form multi to bin after processing
            norm_dtype = 'binary' if len(norm.unique()) == 2 else 'multiclass'
            
            # Cross validation settings for logistic regression
            solver = best_logistic_solver(embed, norm_dtype)
            
            # Defining logistic regression 
            estimator = LogisticRegressionCV(
                Cs=Cs, penalty=penalty, cv=StratifiedKFold(inner_cv),
                solver=solver, n_jobs=8
            )
        else: # continuous
            estimator, scoring = ridge, 'r2'

            
        # Cross validation
        check = checker(
            embed_name, norm, norm_dtype, norm_metadata, outer_cv, norm_name
        )
        if check == 'pass':
            r2_mean, r2_sd = k_fold_cross_val(estimator, embed, norm, norm_dtype, outer_cv, n_jobs)
        else:
            r2 = np.nan
            
        # Saving
        train_n = int(((outer_cv - 1) / outer_cv) * len(embed))
        p = embed.shape[1]
        embed_type = embed_to_type[embed_name]
        rca.append([embed_name, embed_type, norm_name, train_n, p, r2, check])
        
        to_print.append([norm_name, train_n, r2, check])

    
    to_print = (
        pd.DataFrame(to_print, columns=['norm' , 'train_n', 'r2', 'check'])
        .sort_values('r2', ascending=False).head(10)
    )
    print(to_print)

rca = pd.DataFrame(rca, columns=['embed', 'embed_type', 'norm', 'train_n', 'p', 'r2', 'check'])
rca.to_csv('../../data/final/rca_test.csv', index=False)
rca

  0%|          | 0/26 [00:00<?, ?it/s]

-------------------
SVD_sim_rel
-------------------


  0%|          | 0/281 [00:00<?, ?it/s]



                        norm  train_n        r2 check
72       Emot_Assoc_Surprise     3023  0.737305  pass
66        Emot_Assoc_Disgust     3023  0.681822  pass
68            Emot_Assoc_Joy     3023  0.669039  pass
71        Emot_Assoc_Sadness     3023  0.613820  pass
65   Emot_Assoc_Anticipation     3023  0.608344  pass
64          Emot_Assoc_Anger     3023  0.606539  pass
122             Music_Binder      283  0.581443  pass
67           Emot_Assoc_Fear     3023  0.527708  pass
73          Emot_Assoc_Trust     3023  0.505152  pass
272        living_vanarsdall      697  0.376176  pass
-------------------
eye_motor_response
-------------------


  0%|          | 0/281 [00:00<?, ?it/s]



                        norm  train_n        r2 check
72       Emot_Assoc_Surprise     3016  0.711767  pass
66        Emot_Assoc_Disgust     3016  0.654423  pass
68            Emot_Assoc_Joy     3016  0.618159  pass
65   Emot_Assoc_Anticipation     3016  0.591671  pass
64          Emot_Assoc_Anger     3016  0.588973  pass
71        Emot_Assoc_Sadness     3016  0.577385  pass
218                  rt_chen      169  0.530673  pass
67           Emot_Assoc_Fear     3016  0.524245  pass
73          Emot_Assoc_Trust     3016  0.481742  pass
10              CD_SUBTLEXUK     5804  0.463312  pass
-------------------
fastText_Wiki_News
-------------------


  0%|          | 0/281 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                     norm  train_n        r2 check
275     person_vanarsdall      960  0.866450  pass
273    thought_vanarsdall      960  0.849064  pass
213         valence_britz      399  0.847672  pass
214      social_des_britz      399  0.843514  pass
276      goals_vanarsdall      960  0.818154  pass
177   concreteness_hollis      831  0.816618  pass
249  likableness_chandler      752  0.815955  pass
68         Emot_Assoc_Joy    11286  0.813250  pass
72    Emot_Assoc_Surprise    11286  0.808188  pass
11                CD_Blog    54324  0.806670  pass
-------------------
fastTextSub_OpenSub
-------------------


  0%|          | 0/281 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                      norm  train_n        r2 check
208  tabooness_janschewitz      366  0.839467  pass
213          valence_britz      380  0.827306  pass
276       goals_vanarsdall      959  0.816255  pass
24            Conc_Glasgow     3730  0.814846  pass
214       social_des_britz      380  0.814365  pass
68          Emot_Assoc_Joy    11238  0.814217  pass
231          visual_troche      600  0.802692  pass
273     thought_vanarsdall      959  0.800204  pass
177    concreteness_hollis      830  0.795212  pass
275      person_vanarsdall      959  0.779046  pass
-------------------
SGSoftMaxEncoder_SWOW
-------------------


  0%|          | 0/281 [00:00<?, ?it/s]



                     norm  train_n        r2 check
213         valence_britz      296  0.873903  pass
214      social_des_britz      296  0.842274  pass
249  likableness_chandler      408  0.806181  pass
175        valence_hollis      762  0.790703  pass
56    Valence_Covid_Older     2315  0.788042  pass
68         Emot_Assoc_Joy     6111  0.785423  pass
53        Valence_Glasgow     3252  0.783274  pass
72    Emot_Assoc_Surprise     6111  0.782999  pass
66     Emot_Assoc_Disgust     6111  0.761670  pass
162       goals_wilkowski      474  0.761294  pass
-------------------
PPMI_SVD_SWOW
-------------------


  0%|          | 0/281 [00:00<?, ?it/s]



                      norm  train_n        r2 check
249   likableness_chandler      408  0.856723  pass
53         Valence_Glasgow     3252  0.848242  pass
175         valence_hollis      762  0.846752  pass
213          valence_britz      296  0.839369  pass
187            happy_zupan      254  0.838599  pass
162        goals_wilkowski      474  0.827635  pass
56     Valence_Covid_Older     2315  0.820003  pass
208  tabooness_janschewitz      329  0.819415  pass
214       social_des_britz      296  0.815394  pass
196    happiness_stevenson      754  0.806017  pass
-------------------
GloVe_Twitter
-------------------


  0%|          | 0/281 [00:00<?, ?it/s]



                         norm  train_n        r2 check
12                 CD_Twitter    38039  0.899242  pass
5                Freq_Twitter    38039  0.896618  pass
11                    CD_Blog    38039  0.852395  pass
4                   Freq_Blog    38039  0.848744  pass
191             valence_zupan      375  0.813875  pass
68             Emot_Assoc_Joy    10907  0.806116  pass
214          social_des_britz      383  0.803350  pass
205  personal_use_janschewitz      366  0.799704  pass
2              Freq_SUBTLEXUS    35228  0.792173  pass
9                CD_SUBTLEXUS    35228  0.791462  pass
-------------------
GloVe_Wikipedia
-------------------


  0%|          | 0/281 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


            norm  train_n        r2 check
6      Freq_News    51181  0.904593  pass
13       CD_News    51181  0.903631  pass
76       Sem_N_D    39410  0.887963  pass
11       CD_Blog    51181  0.880470  pass
4      Freq_Blog    51181  0.872904  pass
0       Freq_HAL    44045  0.858643  pass
12    CD_Twitter    51181  0.845076  pass
5   Freq_Twitter    51181  0.838930  pass
10  CD_SUBTLEXUK    46424  0.832278  pass
9   CD_SUBTLEXUS    43126  0.831796  pass
-------------------
GloVe_CommonCrawl
-------------------


  0%|          | 0/281 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                 norm  train_n        r2 check
11            CD_Blog    55672  0.911383  pass
4           Freq_Blog    55672  0.911370  pass
5        Freq_Twitter    55672  0.894647  pass
0            Freq_HAL    54953  0.893199  pass
12         CD_Twitter    55672  0.889304  pass
13            CD_News    55672  0.887278  pass
6           Freq_News    55672  0.885333  pass
214  social_des_britz      399  0.878178  pass
10       CD_SUBTLEXUK    49018  0.859891  pass
276  goals_vanarsdall      960  0.844225  pass
-------------------
fMRI_text_cos_align
-------------------




  0%|          | 0/281 [00:00<?, ?it/s]



                            norm  train_n        r2 check
243  subjective_frequency_morrow       18  0.185828  pass
6                      Freq_News      930  0.149144  pass
4                      Freq_Blog      930  0.131971  pass
2                 Freq_SUBTLEXUS      921  0.117133  pass
11                       CD_Blog      930  0.115266  pass
75                         Sem_N      926  0.108664  pass
10                  CD_SUBTLEXUK      926  0.106610  pass
274      reproduction_vanarsdall       76  0.101718  pass
13                       CD_News      930  0.101341  pass
1                        Freq_KF      912  0.094235  pass
-------------------
fMRI_speech_hyper_align
-------------------




  0%|          | 0/281 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                        norm  train_n        r2 check
70       Emot_Assoc_Positive      199  0.384108  pass
2             Freq_SUBTLEXUS      458  0.367389  pass
69       Emot_Assoc_Negative      199  0.365124  pass
11                   CD_Blog      460  0.344201  pass
1                    Freq_KF      448  0.315534  pass
12                CD_Twitter      460  0.309959  pass
5               Freq_Twitter      460  0.285426  pass
3             Freq_SUBTLEXUK      459  0.284010  pass
181  meaningfulness_stratton       12  0.273359  pass
255     imageability_cortese      204  0.270230  pass
-------------------
THINGS
-------------------




  0%|          | 0/281 [00:00<?, ?it/s]



                        norm  train_n        r2 check
111              Body_Binder      128  0.870809  pass
124             Taste_Binder      128  0.839931  pass
274  reproduction_vanarsdall      376  0.837850  pass
131              Path_Binder      128  0.834196  pass
272        living_vanarsdall      376  0.816123  pass
35            Gustatory_Lanc     1235  0.811823  pass
122             Music_Binder      128  0.787877  pass
121             Sound_Binder      128  0.782985  pass
105         Biomotion_Binder      128  0.776412  pass
66        Emot_Assoc_Disgust      719  0.775947  pass
-------------------
CBOW_GoogleNews
-------------------




  0%|          | 0/281 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


                      norm  train_n        r2 check
68          Emot_Assoc_Joy    11257  0.833908  pass
72     Emot_Assoc_Surprise    11257  0.803290  pass
208  tabooness_janschewitz      365  0.799080  pass
276       goals_vanarsdall      960  0.794699  pass
231          visual_troche      600  0.794018  pass
213          valence_britz      380  0.788336  pass
273     thought_vanarsdall      960  0.787487  pass
24            Conc_Glasgow     3694  0.781719  pass
275      person_vanarsdall      960  0.779803  pass
162        goals_wilkowski      842  0.777190  pass
-------------------
EEG_speech
-------------------




  0%|          | 0/281 [00:00<?, ?it/s]



                       norm  train_n        r2 check
72      Emot_Assoc_Surprise      608  0.677634  pass
64         Emot_Assoc_Anger      608  0.652398  pass
66       Emot_Assoc_Disgust      608  0.545320  pass
68           Emot_Assoc_Joy      608  0.524830  pass
71       Emot_Assoc_Sadness      608  0.511117  pass
65  Emot_Assoc_Anticipation      608  0.509552  pass
67          Emot_Assoc_Fear      608  0.464407  pass
73         Emot_Assoc_Trust      608  0.457556  pass
69      Emot_Assoc_Negative      608  0.306372  pass
70      Emot_Assoc_Positive      608  0.274697  pass
-------------------
BERT_last4_Book_Wikipedia
-------------------




  0%|          | 0/281 [00:00<?, ?it/s]



                        norm  train_n        r2 check
72       Emot_Assoc_Surprise     4366  0.812499  pass
68            Emot_Assoc_Joy     4366  0.793532  pass
177      concreteness_hollis      324  0.786885  pass
273       thought_vanarsdall      380  0.765779  pass
274  reproduction_vanarsdall      380  0.763398  pass
24              Conc_Glasgow     1468  0.762611  pass
233           thought_troche      220  0.761899  pass
66        Emot_Assoc_Disgust     4366  0.752964  pass
275        person_vanarsdall      380  0.738436  pass
76                   Sem_N_D    14668  0.733584  pass
-------------------
compo_attribs
-------------------




  0%|          | 0/281 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                    norm  train_n        r2 check
162      goals_wilkowski       23  0.962694  pass
275    person_vanarsdall      164  0.956122  pass
273   thought_vanarsdall      164  0.953629  pass
277  movement_vanarsdall      164  0.943224  pass
166         auditory_lyn       16  0.921491  pass
35        Gustatory_Lanc      404  0.914700  pass
235   taste_smell_troche       89  0.899991  pass
231        visual_troche       89  0.892745  pass
191        valence_zupan       12  0.889989  pass
24          Conc_Glasgow      296  0.884773  pass
-------------------
morphoNLM
-------------------


  0%|          | 0/281 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                        norm  train_n        r2 check
72       Emot_Assoc_Surprise    11091  0.783153  pass
68            Emot_Assoc_Joy    11091  0.761268  pass
233           thought_troche      598  0.716872  pass
65   Emot_Assoc_Anticipation    11091  0.685339  pass
227           emotion_troche      598  0.672988  pass
147         Cognition_Binder      421  0.668005  pass
64          Emot_Assoc_Anger    11091  0.655119  pass
66        Emot_Assoc_Disgust    11091  0.654563  pass
231            visual_troche      598  0.644913  pass
230          morality_troche      598  0.635667  pass
-------------------
fMRI_text_hyper_align
-------------------




  0%|          | 0/281 [00:00<?, ?it/s]



                   norm  train_n        r2 check
241  familiarity_morrow       18  0.643633  pass
66   Emot_Assoc_Disgust      416  0.358557  pass
234        color_troche       59  0.310274  pass
73     Emot_Assoc_Trust      416  0.227454  pass
104       Motion_Binder       68  0.188835  pass
151   Unpleasant_Binder       68  0.185168  pass
199      fear_stevenson       97  0.153159  pass
105    Biomotion_Binder       68  0.131076  pass
273  thought_vanarsdall       76  0.114438  pass
276    goals_vanarsdall       76  0.111985  pass
-------------------
norms_sensorimotor
-------------------




  0%|          | 0/281 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                      norm  train_n        r2 check
124           Taste_Binder      404  0.925974  pass
166           auditory_lyn      292  0.867354  pass
167          olfactory_lyn      292  0.834885  pass
168          gustatory_lyn      292  0.826405  pass
125           Smell_Binder      404  0.819316  pass
169  dominant_modality_lyn      275  0.808843  pass
72     Emot_Assoc_Surprise     9840  0.770433  pass
165             haptic_lyn      292  0.766970  pass
164             visual_lyn      292  0.728529  pass
231          visual_troche      600  0.726639  pass
-------------------
EEG_text
-------------------


  0%|          | 0/281 [00:00<?, ?it/s]



                       norm  train_n        r2 check
72      Emot_Assoc_Surprise     1359  0.681824  pass
66       Emot_Assoc_Disgust     1359  0.678207  pass
71       Emot_Assoc_Sadness     1359  0.633850  pass
64         Emot_Assoc_Anger     1359  0.595407  pass
67          Emot_Assoc_Fear     1359  0.564330  pass
68           Emot_Assoc_Joy     1359  0.555245  pass
65  Emot_Assoc_Anticipation     1359  0.537454  pass
73         Emot_Assoc_Trust     1359  0.431251  pass
69      Emot_Assoc_Negative     1359  0.348421  pass
47       Feature_Perceptual      272  0.229334  pass
-------------------
LexVec_CommonCrawl
-------------------




  0%|          | 0/281 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                      norm  train_n        r2 check
208  tabooness_janschewitz      367  0.855970  pass
0                 Freq_HAL    51885  0.850124  pass
213          valence_britz      379  0.839665  pass
214       social_des_britz      379  0.838049  pass
11                 CD_Blog    56324  0.836902  pass
273     thought_vanarsdall      960  0.835047  pass
4                Freq_Blog    56324  0.831507  pass
68          Emot_Assoc_Joy    11287  0.819228  pass
275      person_vanarsdall      960  0.807801  pass
72     Emot_Assoc_Surprise    11287  0.807484  pass
-------------------
fMRI_speech_cos_align
-------------------




  0%|          | 0/281 [00:00<?, ?it/s]



                    norm  train_n        r2 check
3         Freq_SUBTLEXUK      459  0.395141  pass
4              Freq_Blog      460  0.383411  pass
189        neutral_zupan       18  0.378784  pass
70   Emot_Assoc_Positive      199  0.374314  pass
5           Freq_Twitter      460  0.356567  pass
12            CD_Twitter      460  0.356094  pass
11               CD_Blog      460  0.346503  pass
0               Freq_HAL      458  0.334302  pass
1                Freq_KF      448  0.322165  pass
2         Freq_SUBTLEXUS      458  0.289887  pass
-------------------
fastText_CommonCrawl
-------------------




  0%|          | 0/281 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                      norm  train_n        r2 check
208  tabooness_janschewitz      368  0.921226  pass
273     thought_vanarsdall      960  0.882925  pass
214       social_des_britz      399  0.873587  pass
213          valence_britz      399  0.847337  pass
275      person_vanarsdall      960  0.837466  pass
276       goals_vanarsdall      960  0.835141  pass
0                 Freq_HAL    54978  0.833887  pass
68          Emot_Assoc_Joy    11287  0.827992  pass
249   likableness_chandler      772  0.820777  pass
233         thought_troche      600  0.816362  pass
-------------------
spherical_text_Wikipedia
-------------------




  0%|          | 0/281 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                    norm  train_n        r2 check
76               Sem_N_D    35868  0.888431  pass
11               CD_Blog    44341  0.832790  pass
13               CD_News    44341  0.823286  pass
4              Freq_Blog    44341  0.820168  pass
6              Freq_News    44341  0.819938  pass
0               Freq_HAL    38368  0.818153  pass
72   Emot_Assoc_Surprise    11264  0.810578  pass
10          CD_SUBTLEXUK    41380  0.799908  pass
68        Emot_Assoc_Joy    11264  0.795236  pass
214     social_des_britz      390  0.793364  pass
-------------------
BERT_first4_Book_Wikipedia
-------------------




  0%|          | 0/281 [00:00<?, ?it/s]



                    norm  train_n        r2 check
68        Emot_Assoc_Joy     4366  0.814989  pass
72   Emot_Assoc_Surprise     4366  0.805727  pass
273   thought_vanarsdall      380  0.802043  pass
110          Face_Binder      151  0.778059  pass
275    person_vanarsdall      380  0.769325  pass
108         Shape_Binder      151  0.762261  pass
276     goals_vanarsdall      380  0.742866  pass
24          Conc_Glasgow     1468  0.735132  pass
231        visual_troche      220  0.708144  pass
144         Human_Binder      151  0.702684  pass
-------------------
SGSoftMaxDecoder_SWOW
-------------------




  0%|          | 0/281 [00:00<?, ?it/s]



                        norm  train_n        r2 check
72       Emot_Assoc_Surprise     9420  0.778104  pass
68            Emot_Assoc_Joy     9420  0.761546  pass
65   Emot_Assoc_Anticipation     9420  0.688682  pass
66        Emot_Assoc_Disgust     9420  0.680686  pass
71        Emot_Assoc_Sadness     9420  0.662569  pass
64          Emot_Assoc_Anger     9420  0.643570  pass
73          Emot_Assoc_Trust     9420  0.612082  pass
190                sad_zupan      356  0.597898  pass
214         social_des_britz      376  0.596224  pass
162          goals_wilkowski      661  0.585780  pass


Unnamed: 0,embed,norm,train_n,p,r2,check
0,SVD_sim_rel,Freq_HAL,4506,300,0.089362,pass
1,SVD_sim_rel,Freq_KF,3776,300,0.036674,pass
2,SVD_sim_rel,Freq_SUBTLEXUS,4450,300,0.156546,pass
3,SVD_sim_rel,Freq_SUBTLEXUK,4472,300,0.060241,pass
4,SVD_sim_rel,Freq_Blog,4652,300,0.098733,pass
...,...,...,...,...,...,...
7301,SGSoftMaxDecoder_SWOW,goals_vanarsdall,959,300,0.403416,pass
7302,SGSoftMaxDecoder_SWOW,movement_vanarsdall,959,300,0.345155,pass
7303,SGSoftMaxDecoder_SWOW,concreteness_vanarsdall,959,300,0.132158,pass
7304,SGSoftMaxDecoder_SWOW,familiarity_vanarsdall,959,300,0.081765,pass
