In [None]:
# /*==========================================================================================*\
# **                        _           _ _   _     _  _         _                            **
# **                       | |__  _   _/ | |_| |__ | || |  _ __ | |__                         **
# **                       | '_ \| | | | | __| '_ \| || |_| '_ \| '_ \                        **
# **                       | |_) | |_| | | |_| | | |__   _| | | | | | |                       **
# **                       |_.__/ \__,_|_|\__|_| |_|  |_| |_| |_|_| |_|                       **
# \*==========================================================================================*/


# -----------------------------------------------------------------------------------------------
# Author: Bùi Tiến Thành - Tien-Thanh Bui (@bu1th4nh)
# Title: playground.ipynb
# Date: 2024/11/14 15:57:19
# Description: 
# 
# (c) 2024 bu1th4nh. All rights reserved. 
# Written with dedication in the University of Central Florida, EPCOT and the Magic Kingdom.
# -----------------------------------------------------------------------------------------------


import os
import sys
import torch
import logging
import numpy as np
import pandas as pd
from typing import List, Dict, Any, Tuple, Union, Literal

from train_test import prepare_trte_data


In [None]:
# data_folder = 'ROSMAP'
# view_list = [1,2]
# num_epoch_pretrain = 500
# num_epoch = 2500
# lr_e_pretrain = 1e-3
# lr_e = 5e-4
# lr_c = 1e-3

# if data_folder == 'ROSMAP':
#     num_class = 2
# if data_folder == 'BRCA':
#     num_class = 5


# data_tr_list, data_trte_list, trte_idx, labels_trte = prepare_trte_data(data_folder, view_list)

# print("data_tr_list: ", type(data_tr_list))
# print("data_trte_list: ", type(data_trte_list))
# print("trte_idx: ", type(trte_idx))
# print("labels_trte: ", type(labels_trte))


# print()

# print("data_tr_list: ", len(data_tr_list))
# print("data_trte_list: ", len(data_trte_list))
# print("labels_trte: ", len(labels_trte))


# print()


# print("data_tr_list: ", [Ariel.shape for Ariel in data_tr_list])
# print("data_trte_list: ", [Ariel.shape for Ariel in data_trte_list])
# print("labels_trte: ", labels_trte.shape)


# print()




# print(trte_idx['tr'])
# print(trte_idx['te'])


# print()


# print("data_tr_list: ", data_tr_list)
# print("data_trte_list: ", data_trte_list)
# print("trte_idx: ", trte_idx)
# print("labels_trte: ", labels_trte)

In [None]:
PATH_BRCA = '/home/bu1th4nh/Datasets/BreastCancer/processed_crossOmics'
test_info = pd.read_parquet(f'{PATH_BRCA}/testdata_classification.parquet')
display(test_info.head())


mRNA = pd.read_parquet(f'{PATH_BRCA}/mRNA.parquet')
miRNA = pd.read_parquet(f'{PATH_BRCA}/miRNA.parquet')
clinical = pd.read_parquet(f'{PATH_BRCA}/clinical.parquet')
for label in clinical.columns:
    clinical[label] = clinical[label].apply(lambda x: 1 if x == 'Positive' else 0)
display(clinical.head().T)

In [None]:
def custom___prepare_trte_data(
    omic_layers: List[pd.DataFrame], 
    label_data_series: pd.Series,
    tr_sample_list: List[str],
    te_sample_list: List[str],
    adj_parameter: int,
) -> Tuple[
    List[torch.Tensor], 
    List[torch.Tensor], 
    Dict[str, List[int]], 
    np.ndarray,
    list,
    int,
    int,
]:
    # Set up num class and dim_he_list
    num_class = len(label_data_series.unique())
    dim_he_list = [layer.shape[0] for layer in omic_layers]


    # Retrieve train/test labels
    num_view = len(omic_layers)
    labels_tr = label_data_series.loc[tr_sample_list].values.astype(int)
    labels_te = label_data_series.loc[te_sample_list].values.astype(int)


    # Retrieve train/test data
    data_tr_list = []
    data_te_list = []
    for i in range(num_view):
        data_tr_list.append(omic_layers[i].T.loc[tr_sample_list].values)
        data_te_list.append(omic_layers[i].T.loc[te_sample_list].values)
    

    # Concatenate train/test data and create tensor
    num_tr = data_tr_list[0].shape[0]
    num_te = data_te_list[0].shape[0]
    data_mat_list = []
    for i in range(num_view):
        data_mat_list.append(np.concatenate((data_tr_list[i], data_te_list[i]), axis=0))
    
    
    
    data_tensor_list = []
    for i in range(len(data_mat_list)):
        data_tensor_list.append(torch.FloatTensor(data_mat_list[i]))
        if torch.cuda.is_available():
            data_tensor_list[i] = data_tensor_list[i].cuda()
    
    
    idx_dict = {}
    idx_dict["tr"] = list(range(num_tr))
    idx_dict["te"] = list(range(num_tr, (num_tr+num_te)))


    data_train_list = []
    data_all_list = []
    for i in range(len(data_tensor_list)):
        data_train_list.append(data_tensor_list[i][idx_dict["tr"]].clone())
        data_all_list.append(torch.cat((data_tensor_list[i][idx_dict["tr"]].clone(),
                                       data_tensor_list[i][idx_dict["te"]].clone()),0))
    labels = np.concatenate((labels_tr, labels_te))
    
    return data_train_list, data_all_list, idx_dict, labels, dim_he_list, num_class, adj_parameter

In [None]:
data_tr_list, data_trte_list, trte_idx, labels_trte, dim_he_list, num_class, adj_parameter = custom___prepare_trte_data(
    omic_layers = [mRNA, miRNA],
    label_data_series = clinical['ER'],
    tr_sample_list = test_info.loc['Test000', 'ER_train'],
    te_sample_list = test_info.loc['Test000', 'ER_test'],
    adj_parameter = 0,
)

print('dim_he_list: ', dim_he_list)
print('num_class: ', num_class)
print('adj_parameter: ', adj_parameter)

print()



print("data_tr_list: ", type(data_tr_list))
print("data_trte_list: ", type(data_trte_list))
print("trte_idx: ", type(trte_idx))
print("labels_trte: ", type(labels_trte))


print()

print("data_tr_list: ", len(data_tr_list))
print("data_trte_list: ", len(data_trte_list))
print("labels_trte: ", len(labels_trte))


print()


print("data_tr_list: ", [Ariel.shape for Ariel in data_tr_list])
print("data_trte_list: ", [Ariel.shape for Ariel in data_trte_list])
print("labels_trte: ", labels_trte.shape)


print()




print(trte_idx['tr'])
print(trte_idx['te'])


print()


print("data_tr_list: ", data_tr_list)
print("data_trte_list: ", data_trte_list)
print("trte_idx: ", trte_idx)
print("labels_trte: ", labels_trte)


In [None]:
import pandas as pd
import numpy as np
import mlflow



Ariel = pd.read_parquet(f"MOGONET_AUC_result.parquet")

for label in ["ER", "PR", "HER2", "TN"]:
    for cls_method in ["SVM", "Random Forest", "Logistic Regression", "AdaBoost"][:1]:
        auc_values = Ariel[f"{label}_{cls_method}_AUC"].values
        avg_auc = np.mean(auc_values)
        std_auc = np.std(auc_values)
        max_auc = np.max(auc_values)
        min_auc = np.min(auc_values)
        med_auc = np.median(auc_values)
        
        
        # Logging
        print(f"{label} - {cls_method} - Mean AUC: {avg_auc}")
        print(f"{label} - {cls_method} - Median AUC: {med_auc}")
        print(f"{label} - {cls_method} - Std AUC: {std_auc}")
        print(f"{label} - {cls_method} - Max AUC: {max_auc}")
        print(f"{label} - {cls_method} - Min AUC: {min_auc}")
        print()

        mlflow.log_metric(f"{label} {cls_method} Mean AUC", avg_auc)


In [2]:
A1 = [{"Ariel": "1"}, {"Belle": "2"}, {"Cindy": "3"}]

A2 = []
for gg in A1:

    A2.append(gg)
    del gg


A2

[{'Ariel': '1'}, {'Belle': '2'}, {'Cindy': '3'}]