In [30]:
# %%
from argparse import ArgumentParser
from pprint import pprint

import junifer
import numpy as np
import pandas as pd
from factor_analyzer import Rotator
from julearn import run_cross_validation
from julearn.config import set_config
from julearn.pipeline import PipelineCreator
from julearn.utils import configure_logging
from junifer.storage import HDF5FeatureStorage
from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import RepeatedKFold
from sklearn.preprocessing import StandardScaler
#from statsmodels.multivariate.pca import PCA
from sklearn.decomposition import SparsePCA
from sklearn.decomposition import PCA

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

configure_logging(level="INFO")

set_config("disable_x_check", True)
set_config("disable_xtypes_check", True)


2024-11-22 14:53:38,898 [    INFO] ===== Lib Versions =====
2024-11-22 14:53:38,899 [    INFO] numpy: 1.26.4
2024-11-22 14:53:38,900 [    INFO] scipy: 1.14.0
2024-11-22 14:53:38,900 [    INFO] sklearn: 1.4.2
2024-11-22 14:53:38,900 [    INFO] pandas: 2.1.4
2024-11-22 14:53:38,901 [    INFO] julearn: 0.3.4.dev10
2024-11-22 14:53:38,902 [    INFO] Setting global config disable_x_check to True
2024-11-22 14:53:38,902 [    INFO] Setting global config disable_xtypes_check to True


2024-11-22 14:53:38,898 - julearn - INFO - ===== Lib Versions =====
2024-11-22 14:53:38,899 - julearn - INFO - numpy: 1.26.4
2024-11-22 14:53:38,900 - julearn - INFO - scipy: 1.14.0
2024-11-22 14:53:38,900 - julearn - INFO - sklearn: 1.4.2
2024-11-22 14:53:38,900 - julearn - INFO - pandas: 2.1.4
2024-11-22 14:53:38,901 - julearn - INFO - julearn: 0.3.4.dev10
2024-11-22 14:53:38,902 - julearn - INFO - Setting global config disable_x_check to True
2024-11-22 14:53:38,902 - julearn - INFO - Setting global config disable_xtypes_check to True


In [24]:

# %%
# 80 specific subjects same as the paper
train_subs = (
    pd.read_csv("MMP_HCP_80_subs_componentscoreestimation.txt", header=None)
    .values.flatten()
    .astype(str)
)  # 80 subjects

# 753 specific subject for main analysis same as the paper
test_subs = (
    pd.read_csv("MMP_HCP_753_subs.txt", header=None).values.flatten().astype(str)
)  # 753 subjects

row_names =  ['Visual Episodic Memory','Cognitive Flexibility','Inhibition','Fluid Intelligence','Reading','Vocabulary','Processing Speed','Spatial Orientation','Sustained Attention_Sens','Sustained Attention_Spec','Verbal Episodic Memory','Working Memory (List soting)','Cognitive Status',
                 'Sleep Quality','Walking Endurance','Manual Dexterity','Grip Strength','Odor Identification','Pain Interference Survey','Taste Intensity','Contrast Sensitivity','Emotional Face Matching','Arithmetic',
                 'Story Comprehension','Relational Processing','Social Cognition_Random','Social Cognition_Interaction','Working Memory (N-back)','Agreeableness','Openness','Conscientiousness','Neuroticism','Extraversion','Emot. Recog. _ Total','Emot. Recog. _ Angry','Emot. Recog. _Fear',
                 'Emot. Recog. _ Happy','Emot. Recog. _ Neutral','Emot. Recog. _ Sad','Anger _ Affect','Anger _ Hostility','Anger _ Aggression','Fear _ Affect','Fear _ Somatic Arousal','Sadness','Life Satisfaction','Meaning & Purpose','Positive Affect','Friendship',
                 'Loneliness','Perceived Hostility','Perceived Rejection','Emotional Support','Instrument Support','Perceived Stress','Self-Efficacy','Delay Discounting','Walking Speed']

columns = [
    "PicSeq_Unadj",
    "CardSort_Unadj",
    "Flanker_Unadj",
    "PMAT24_A_CR",
    "ReadEng_Unadj",
    "PicVocab_Unadj",
    "ProcSpeed_Unadj",
    "VSPLOT_TC",
    "SCPT_SEN",
    "SCPT_SPEC",
    "IWRD_TOT",
    "ListSort_Unadj",
    "MMSE_Score",
    "PSQI_Score",
    "Endurance_Unadj",
    "Dexterity_Unadj",
    "Strength_Unadj",
    "Odor_Unadj",
    "PainInterf_Tscore",
    "Taste_Unadj",
    "Mars_Final",
    "Emotion_Task_Face_Acc",
    "Language_Task_Math_Avg_Difficulty_Level",
    "Language_Task_Story_Avg_Difficulty_Level",
    "Relational_Task_Acc",
    "Social_Task_Perc_Random",
    "Social_Task_Perc_TOM",
    "WM_Task_Acc",
    "NEOFAC_A",
    "NEOFAC_O",
    "NEOFAC_C",
    "NEOFAC_N",
    "NEOFAC_E",
    "ER40_CR",
    "ER40ANG",
    "ER40FEAR",
    "ER40HAP",
    "ER40NOE",
    "ER40SAD",
    "AngAffect_Unadj",
    "AngHostil_Unadj",
    "AngAggr_Unadj",
    "FearAffect_Unadj",
    "FearSomat_Unadj",
    "Sadness_Unadj",
    "LifeSatisf_Unadj",
    "MeanPurp_Unadj",
    "PosAffect_Unadj",
    "Friendship_Unadj",
    "Loneliness_Unadj",
    "PercHostil_Unadj",
    "PercReject_Unadj",
    "EmotSupp_Unadj",
    "InstruSupp_Unadj",
    "PercStress_Unadj",
    "SelfEff_Unadj",
    "DDisc_AUC_40K",
    "GaitSpeed_Comp",
]
# Load the dataset
full_df = pd.read_csv("Behavioral_Data", index_col="Subject")[columns]
full_df.index = full_df.index.astype(str)
test_df = full_df.loc[test_subs]
train_df = full_df.loc[train_subs]


Sparse PCA

In [None]:
imputer = IterativeImputer(max_iter=20, random_state=0)
df_imputed = imputer.fit_transform(train_df)

scaler = StandardScaler()
df_trans = scaler.fit_transform(df_imputed)

sparsepca = SparsePCA(n_components=3,random_state=42)
df_sparsepca = sparsepca.fit_transform(df_trans)
loadings = sparsepca.components_
loadings = pd.DataFrame(loadings.T,index=row_names)

rotator = Rotator(method="varimax")
rotated_loadings = rotator.fit_transform(loadings)
rotated_loadings = pd.DataFrame(rotated_loadings,index=row_names)
# df_test_imputed = imputer.transform(test_df)
# scaled = scaler.transform(df_test_imputed)
# factors = np.dot(scaled, rotated_loadings)
# factors_df = pd.DataFrame(
#     factors,
#     index=test_df.index,
#     columns=["varimax_satisf", "varimax_cog", "varimax_er"],
# )


In [None]:
for i in range(3):
    print(f"Sparse_PCA{i}\n",rotated_loadings[i].abs().nlargest(10))
    

Sparse_PCA0
 Perceived Stress       0.277083
Sadness                0.272569
Loneliness             0.264232
Neuroticism            0.261841
Perceived Rejection    0.251014
Anger _ Affect         0.250242
Fear _ Affect          0.243867
Positive Affect        0.233513
Life Satisfaction      0.232094
Meaning & Purpose      0.227041
Name: 0, dtype: float64
Sparse_PCA1
 Reading                         0.335301
Vocabulary                      0.315226
Fluid Intelligence              0.310060
Story Comprehension             0.274991
Relational Processing           0.271783
Spatial Orientation             0.264973
Working Memory (N-back)         0.249605
Delay Discounting               0.231766
Visual Episodic Memory          0.231431
Working Memory (List soting)    0.228808
Name: 1, dtype: float64
Sparse_PCA2
 Emot. Recog. _ Total            0.559273
Emot. Recog. _ Neutral          0.474060
Social Cognition_Interaction    0.409980
Social Cognition_Random         0.350499
Emot. Recog. _ Sad 

PCA

In [35]:

imputer = IterativeImputer(max_iter=20, random_state=0)
df_imputed = imputer.fit_transform(train_df)

scaler = StandardScaler()
df_trans = scaler.fit_transform(df_imputed)

pca = PCA(n_components=3,random_state=42)
df_pca = pca.fit_transform(df_trans)
loadings = pca.components_
loadings = pd.DataFrame(loadings.T,index=row_names)

rotator = Rotator(method="varimax")
rotated_loadings = rotator.fit_transform(loadings)
rotated_loadings = pd.DataFrame(rotated_loadings,index=row_names)

In [None]:
for i in range(3):
    print(f"PCA{i}\n",rotated_loadings[i].abs().nlargest(10))

PCA0
 Sadness                0.255977
Perceived Stress       0.255488
Loneliness             0.245144
Neuroticism            0.239447
Positive Affect        0.233404
Meaning & Purpose      0.233218
Perceived Rejection    0.232838
Life Satisfaction      0.229709
Anger _ Affect         0.225087
Fear _ Affect          0.223447
Name: 0, dtype: float64
PCA1
 Reading                         0.302398
Vocabulary                      0.289734
Fluid Intelligence              0.261402
Spatial Orientation             0.249257
Visual Episodic Memory          0.237045
Relational Processing           0.237016
Story Comprehension             0.231846
Working Memory (N-back)         0.220725
Working Memory (List soting)    0.216566
Delay Discounting               0.210323
Name: 1, dtype: float64
PCA2
 Emot. Recog. _ Neutral          0.350870
Emot. Recog. _ Total            0.349149
Social Cognition_Random         0.302970
Social Cognition_Interaction    0.299935
Odor Identification             0.227335