In [1]:
# /*==========================================================================================*\
# **                        _           _ _   _     _  _         _                            **
# **                       | |__  _   _/ | |_| |__ | || |  _ __ | |__                         **
# **                       | '_ \| | | | | __| '_ \| || |_| '_ \| '_ \                        **
# **                       | |_) | |_| | | |_| | | |__   _| | | | | | |                       **
# **                       |_.__/ \__,_|_|\__|_| |_|  |_| |_| |_|_| |_|                       **
# \*==========================================================================================*/


# -----------------------------------------------------------------------------------------------
# Author: Bùi Tiến Thành - Tien-Thanh Bui (@bu1th4nh)
# Title: X-intNMF-notebook.ipynb
# Date: 2025/03/17 16:11:50
# Description: 
# 
# (c) 2025 bu1th4nh / UCF Computational Biology Lab. All rights reserved. 
# Written with dedication in the University of Central Florida, EPCOT and the Magic Kingdom.
# -----------------------------------------------------------------------------------------------


import logging
import numpy as np
import pandas as pd
from tqdm import tqdm
from typing import List, Dict, Any, Tuple, Union, Literal


from model.crossOmicNMF import XIntNMF

# Data Acquisition

In [2]:
mRNA = pd.read_parquet('sample_processed_data/BRCA_micro_dataset/mRNA.parquet')
miRNA = pd.read_parquet('sample_processed_data/BRCA_micro_dataset/miRNA.parquet')
DNAMethylation = pd.read_parquet('sample_processed_data/BRCA_micro_dataset/mRNA.parquet')

mRNA_miRNA_graph = pd.read_parquet('sample_processed_data/BRCA_micro_dataset/mRNA_miRNA_graph.parquet')

# Settings

These are the optimal settings for classification, which are used in the manuscript 

In [3]:
k_latent_components = 10
alpha_graph_reg = 100
beta_omics_factors_sparsity_reg = 100


# Model

In [7]:
model = XIntNMF(
    omics_layers = [
        mRNA.to_numpy(np.float64, True), 
        miRNA.to_numpy(np.float64, True), 
        DNAMethylation.to_numpy(np.float64, True)
    ],
    cross_omics_interaction = {
        (0, 1): mRNA_miRNA_graph.to_numpy(np.float64, True)
    },
    k = k_latent_components,
    alpha = alpha_graph_reg,
    betas = beta_omics_factors_sparsity_reg,
    gammas = 1,
    max_iter = 5000,
    tol = 1e-4,
    verbose = True,
    backend = 'numpy'
)

# Solve
Ws, H = model.solve(run_mode='full', use_cross_validation=True)


# Post-process
sample_list = mRNA.columns
omics_features_list = [mRNA.index, miRNA.index, DNAMethylation.index]
latent_columns = [f"Latent_{i:03}" for i in range(k_latent_components)]


# Save the results
sample_factor_matrix_H = pd.DataFrame(H, index=latent_columns, columns=sample_list)
omics_factor_matrices_Ws = [pd.DataFrame(W, index=omics_features_list[i], columns=latent_columns) for i, W in enumerate(Ws)]



Solving H matrix: 100%|██████████| 50/50 [00:07<00:00,  6.67it/s]


In [8]:
sample_factor_matrix_H.head()

Sample,TCGA-A2-A1FX-01,TCGA-A2-A259-01,TCGA-A2-A25B-01,TCGA-A2-A25D-01,TCGA-A7-A3IZ-01,TCGA-A7-A425-01,TCGA-A7-A4SD-01,TCGA-AC-A23G-01,TCGA-AC-A2B8-01,TCGA-AC-A3QP-01,...,TCGA-LQ-A4E4-01,TCGA-OL-A5RV-01,TCGA-OL-A5RW-01,TCGA-OL-A66I-01,TCGA-OL-A66J-01,TCGA-OL-A6VR-01,TCGA-S3-A6ZF-01,TCGA-S3-A6ZH-01,TCGA-S3-AA0Z-01,TCGA-S3-AA12-01
Latent_000,545.832932,545.891011,576.271285,557.534721,549.139306,554.304521,567.966502,570.748648,573.283219,552.273241,...,548.487837,567.660571,575.86034,576.876441,571.083229,545.354715,565.775241,549.629075,584.090359,525.827762
Latent_001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Latent_002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Latent_003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Latent_004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
