In [2]:
import os
import pandas as pd
import pickle

In [3]:
from PyDimRed.transform import TransformWrapper
from PyDimRed.plot import display
from sklearn.manifold import TSNE
from PyDimRed.plot import display_heatmap_df
from PyDimRed.plot import display_group
from PyDimRed.utils.dr_utils import reduce_data_with_params
from PyDimRed.evaluation import ModelEvaluator

  from .autonotebook import tqdm as notebook_tqdm
  import pkg_resources


In [4]:
# ==== UKB
derivatives_folder = "/media/jaume/DATA/Data/Urblauna_SFTP/UKB_Cardiac_BIDS/derivatives"
data_path = os.path.join(derivatives_folder, 'GraphClassification')
all_edges = 'Edges-True_Norm-ZNorm_Global-True_All-True_Sim-False_BP-False'
aha_edges = 'Edges-True_Norm-ZNorm_Global-True_All-False_Sim-False_BP-False'
# study_name = 'Multiplex_HPT_ACDC_ADAM_FINAL_MAE'

# =========================== ALL ===========================
study_name = 'Multiplex_HPT_UKB_DIMENSIONS_NEW_LOSS_ALL'
all_data_folder = os.path.join(data_path, all_edges, study_name)  # save_folder

# =========================== AHA ===========================
study_name = 'Multiplex_HPT_UKB_DIMENSIONS_NEW_LOSS'
aha_data_folder = os.path.join(data_path, aha_edges, study_name)  # save_folder

save_folder = os.path.join(all_data_folder, 'latent_analysis') 
latent_filename = os.path.join(all_data_folder, 'latent_data.csv')
save_folder = os.path.join(aha_data_folder, 'latent_analysis') 
latent_filename = os.path.join(aha_data_folder, 'latent_data.csv')
reprocess_datasets = True
os.makedirs(save_folder, exist_ok=True)

In [6]:
# --- Load the latent data
df_latent = pd.read_csv(latent_filename) # -- Need to identify the subjects!

# Explore correlations between the latent and the biomarkers
df_latent.set_index('Subject', inplace=True)
labels = df_latent[['labels']].copy()

columns_to_drop = ['labels', 'Group', 'Region', 'Sample', 'Cycle', 'ed_cycle_time', 'ed_frame_idx']
latent_drop = [l for l in df_latent.columns if l in columns_to_drop]
df_latent.drop(latent_drop, axis=1, inplace=True)

df_latent
print(df_latent.columns)
labels

# Define X and y
X = df_latent.values
y = labels.values.ravel()
print(X.shape, y.shape)

# # --- Dimensionality reduction
# save_filename = os.path.join(save_folder, 'results_analysis.pkl')
# save_filename_pacmap = os.path.join(save_folder, 'results_analysis_pacmap.pkl')
# if os.path.isfile(save_filename) and not reprocess_datasets:
#     with open(save_filename, 'rb') as f:
#         save_data = pickle.load(f)
#     bestScore = save_data['bestScore']
#     bestParams = save_data['bestParams']
#     results = save_data['results']
# else:
#     # params = [{"method" : ["TSNE", "UMAP"], "n_nbrs" : range(2,10,1)}]
#     params = [{"method" : ["TSNE", "UMAP"], "n_nbrs" : range(2,10,1)}]
#     model_eval = ModelEvaluator(X,y,params,K=3, n_jobs=1)
#     bestScore, bestParams, results = model_eval.cross_validation()
#     save_data = {'bestScore': bestScore, 'bestParams': bestParams, 'results': results}
#     with open(save_filename, 'wb') as f:
#         pickle.dump(save_data, f)

#     # PACMAP
#     params = [{"method" : ["PACMAP"]}]
#     model_eval = ModelEvaluator(X,y,params,K=3, n_jobs=1)
#     bestScore, bestParams, results = model_eval.cross_validation()
#     save_data = {'bestScore': bestScore, 'bestParams': bestParams, 'results': results}
#     with open(save_filename_pacmap, 'wb') as f:
#         pickle.dump(save_data, f)

# --- Display the results
# display_heatmap_df(results,'param_Transform__method','param_Transform__n_nbrs', 'mean_test_score')

Index(['Node_0_Control_0', 'Node_1_Control_0', 'Node_2_Control_0',
       'Node_3_Control_0', 'Node_4_Control_0', 'Node_5_Control_0',
       'Node_6_Control_0', 'Node_7_Control_0', 'Node_8_Control_0',
       'Node_9_Control_0',
       ...
       'Node_15_Init_6', 'Node_16_Init_6', 'Node_17_Init_6', 'Node_18_Init_6',
       'Node_19_Init_6', 'Node_20_Init_6', 'Node_21_Init_6', 'Node_22_Init_6',
       'Node_23_Init_6', 'Node_24_Init_6'],
      dtype='object', length=450)
(413, 450) (413,)


In [14]:
# UMAP 10 dimensions -- variable number of neighs
# params = [{"method" : ["TSNE", "UMAP"], "n_nbrs" : range(2,10,1)}]
# params = [{"method" : ["TSNE", "UMAP"], "n_nbrs" : range(2,10,1), "n_components": range(5, 10)}]
params = [{"method" : ["UMAP"], "n_nbrs" : range(2,10,1), 
           "n_components": range(6, 20, 2), 
           "metric": ['cosine'],
           'min_dist': [0.0, 0.1, 0.2, 0.5, 0.8, 0.99], }]
model_eval = ModelEvaluator(X,y,params,K=3, n_jobs=3)
bestScore, bestParams, results = model_eval.cross_validation()
save_data = {'bestScore': bestScore, 'bestParams': bestParams, 'results': results}
save_filename = os.path.join(save_folder, 'results_analysis_10.pkl')
with open(save_filename, 'wb') as f:
    pickle.dump(save_data, f)



In [15]:
results.sort_values(by='mean_test_score', ascending=False)[['param_Transform__method', 'param_Transform__n_components', 'param_Transform__n_nbrs', 'mean_test_score']]

Unnamed: 0,param_Transform__method,param_Transform__n_components,param_Transform__n_nbrs,mean_test_score
275,UMAP,18,5,0.571353
147,UMAP,14,5,0.568990
322,UMAP,16,4,0.566716
47,UMAP,16,9,0.561691
233,UMAP,8,3,0.561638
...,...,...,...,...
51,UMAP,18,5,0.474629
204,UMAP,14,6,0.474611
128,UMAP,10,2,0.472161
66,UMAP,8,4,0.455323
