In [1]:
import os
import pandas as pd
import pickle

In [2]:
from PyDimRed.transform import TransformWrapper
from PyDimRed.plot import display
from sklearn.manifold import TSNE
from PyDimRed.plot import display_heatmap_df
from PyDimRed.plot import display_group
from PyDimRed.utils.dr_utils import reduce_data_with_params
from PyDimRed.evaluation import ModelEvaluator

  from .autonotebook import tqdm as notebook_tqdm
  import pkg_resources


In [3]:
# ==== ACDC
derivatives_folder = '/media/jaume/DATA/Data/New_ACDC/MIDS/mixed/derivatives'
data_path = os.path.join(derivatives_folder, 'GraphClassification')
all_edges = 'Edges-True_Norm-ZNorm_Global-True_All-True_Sim-False_BP-False'
aha_edges = 'Edges-True_Norm-ZNorm_Global-True_All-False_Sim-False_BP-False'
# study_name = 'Multiplex_HPT_ACDC_ADAM_FINAL_MAE'

# =========================== ALL ===========================
study_name = 'Multiplex_HPT_ACDC_DIMENSIONS_ALL_SUM'
all_data_folder = os.path.join(data_path, all_edges, study_name)  # save_folder

# =========================== AHA ===========================
study_name = 'Multiplex_HPT_ACDC_DIMENSIONS_SUM'
aha_data_folder = os.path.join(data_path, aha_edges, study_name)  # save_folder

save_folder = os.path.join(all_data_folder, 'latent_analysis') 
latent_filename = os.path.join(all_data_folder, 'latent_data.csv')
save_folder = os.path.join(aha_data_folder, 'latent_analysis') 
latent_filename = os.path.join(aha_data_folder, 'latent_data.csv')
reprocess_datasets = True
os.makedirs(save_folder, exist_ok=True)

In [4]:
# --- Load the latent data
df_latent = pd.read_csv(latent_filename) # -- Need to identify the subjects!

# Explore correlations between the latent and the biomarkers
df_latent.set_index('Subject', inplace=True)
labels = df_latent[['labels']].copy()

columns_to_drop = ['labels', 'Group', 'Region', 'Sample', 'Cycle', 'ed_cycle_time', 'ed_frame_idx']
latent_drop = [l for l in df_latent.columns if l in columns_to_drop]
df_latent.drop(latent_drop, axis=1, inplace=True)

df_latent
print(df_latent.columns)
labels

# Define X and y
X = df_latent.values
y = labels.values.ravel()
print(X.shape, y.shape)

# # --- Dimensionality reduction
# save_filename = os.path.join(save_folder, 'results_analysis.pkl')
# save_filename_pacmap = os.path.join(save_folder, 'results_analysis_pacmap.pkl')
# if os.path.isfile(save_filename) and not reprocess_datasets:
#     with open(save_filename, 'rb') as f:
#         save_data = pickle.load(f)
#     bestScore = save_data['bestScore']
#     bestParams = save_data['bestParams']
#     results = save_data['results']
# else:
#     # params = [{"method" : ["TSNE", "UMAP"], "n_nbrs" : range(2,10,1)}]
#     params = [{"method" : ["TSNE", "UMAP"], "n_nbrs" : range(2,10,1)}]
#     model_eval = ModelEvaluator(X,y,params,K=3, n_jobs=1)
#     bestScore, bestParams, results = model_eval.cross_validation()
#     save_data = {'bestScore': bestScore, 'bestParams': bestParams, 'results': results}
#     with open(save_filename, 'wb') as f:
#         pickle.dump(save_data, f)

#     # PACMAP
#     params = [{"method" : ["PACMAP"]}]
#     model_eval = ModelEvaluator(X,y,params,K=3, n_jobs=1)
#     bestScore, bestParams, results = model_eval.cross_validation()
#     save_data = {'bestScore': bestScore, 'bestParams': bestParams, 'results': results}
#     with open(save_filename_pacmap, 'wb') as f:
#         pickle.dump(save_data, f)

# --- Display the results
# display_heatmap_df(results,'param_Transform__method','param_Transform__n_nbrs', 'mean_test_score')

Index(['Node_0_Control_0', 'Node_1_Control_0', 'Node_2_Control_0',
       'Node_3_Control_0', 'Node_4_Control_0', 'Node_5_Control_0',
       'Node_6_Control_0', 'Node_7_Control_0', 'Node_8_Control_0',
       'Node_9_Control_0',
       ...
       'Node_15_Init_4', 'Node_16_Init_4', 'Node_17_Init_4', 'Node_18_Init_4',
       'Node_19_Init_4', 'Node_20_Init_4', 'Node_21_Init_4', 'Node_22_Init_4',
       'Node_23_Init_4', 'Node_24_Init_4'],
      dtype='object', length=525)
(150, 525) (150,)


In [10]:
# UMAP 10 dimensions -- variable number of neighs
# params = [{"method" : ["TSNE", "UMAP"], "n_nbrs" : range(2,10,1)}]
params = [{"method" : ["UMAP"], "n_nbrs" : range(2,10,1), 
           "n_components": range(6, 20, 2), 
           "metric": ['cosine'],
           'min_dist': [0.0, 0.1, 0.2, 0.5, 0.8, 0.99], }]
model_eval = ModelEvaluator(X,y,params,K=3, n_jobs=3)
bestScore, bestParams, results = model_eval.cross_validation()
save_data = {'bestScore': bestScore, 'bestParams': bestParams, 'results': results}
save_filename = os.path.join(save_folder, 'results_analysis_10.pkl')
with open(save_filename, 'wb') as f:
    pickle.dump(save_data, f)



In [11]:
results.sort_values(by='mean_test_score', ascending=False)[['param_Transform__method', 'param_Transform__n_components', 'param_Transform__n_nbrs', 'mean_test_score']]

Unnamed: 0,param_Transform__method,param_Transform__n_components,param_Transform__n_nbrs,mean_test_score
195,UMAP,12,5,0.680000
241,UMAP,10,3,0.673333
25,UMAP,12,3,0.673333
211,UMAP,16,5,0.666667
287,UMAP,6,9,0.666667
...,...,...,...,...
80,UMAP,12,2,0.393333
128,UMAP,10,2,0.386667
184,UMAP,10,2,0.360000
280,UMAP,6,2,0.353333


In [14]:
results.query(f"param_Transform__n_components==6").sort_values(by='mean_test_score', ascending=False)[['param_Transform__method', 'param_Transform__n_components', 'param_Transform__n_nbrs', 'mean_test_score']]

Unnamed: 0,param_Transform__method,param_Transform__n_components,param_Transform__n_nbrs,mean_test_score
287,UMAP,6,9,0.666667
174,UMAP,6,8,0.66
118,UMAP,6,8,0.66
170,UMAP,6,4,0.66
171,UMAP,6,5,0.66
1,UMAP,6,3,0.646667
57,UMAP,6,3,0.64
225,UMAP,6,3,0.633333
113,UMAP,6,3,0.626667
227,UMAP,6,5,0.626667
