# BioNeuralNet Cancer Example 1 

In [1]:
from bioneuralnet.external_tools import get_cancer_data, preprocess_clinical, filter_common_patients
import cptac

ccrcc = cptac.Ccrcc()

genes_raw, proteins_raw, clinical_raw = get_cancer_data(ccrcc)
clinical_raw.to_csv("clinical_raw.csv")
genes, proteins, clinical_filtered = filter_common_patients(genes_raw, proteins_raw, clinical_raw)

clinical, phenotype = preprocess_clinical(clinical_filtered)

print(f"Genes shaope: {genes.shape}")
print(f"Proteins shape: {proteins.shape}")
print(f"Clinical shape: {clinical.shape}")
print(f"Phenotype shape: {phenotype.shape}")

print(genes.head())
print(proteins.head())
print(clinical.head())
print(phenotype.head())






Common Patients: 103
Genes shaope: (103, 60525)
Proteins shape: (103, 11889)
Clinical shape: (103, 11)
Phenotype shape: (103, 1)
            ENSG00000121410.12  ENSG00000268895.6  ENSG00000148584.15  \
Patient_ID                                                              
C3L-00004                0.041              0.041               0.004   
C3L-00010               -0.007             -0.007               0.007   
C3L-00011               -0.064             -0.064               0.007   
C3L-00026                0.010              0.010              -0.007   
C3L-00079                0.186              0.186               0.266   

            ENSG00000175899.15  ENSG00000245105.4  ENSG00000166535.20  \
Patient_ID                                                              
C3L-00004                0.015              0.015               0.015   
C3L-00010               -0.001             -0.001              -0.001   
C3L-00011                0.004              0.004               0.0

In [2]:
from sklearn.feature_selection import VarianceThreshold
import pandas as pd

print("Before filtering:\n")
print(f"Gnes has {genes.isnull().sum().sum()} missing values")
print(f"Proteins has {proteins.isnull().sum().sum()} missing values")

# remove columns with Nan values
genes = genes.dropna(axis=1, how='any')
proteins = proteins.dropna(axis=1, how='any')

print("\nAfter Droping Nans:")
print(genes.shape)
print(proteins.shape)

variance_filter = VarianceThreshold(threshold=0.2)
genomics_array = variance_filter.fit_transform(genes)

genomics = pd.DataFrame(
    genomics_array,
    index=genes.index,
    columns=genes.columns[variance_filter.get_support()]
)

variance_filter_prot = VarianceThreshold(threshold=0.2)
proteomics_array = variance_filter_prot.fit_transform(proteins)

proteomics = pd.DataFrame(
    proteomics_array,
    index=proteins.index,
    columns=proteins.columns[variance_filter_prot.get_support()]
)

print("After filtering:")
print("Genomics shape:", genomics.shape)
print("Proteomics shape:", proteomics.shape)
print(genomics.head())
print(proteomics.head())

Before filtering:

Gnes has 0 missing values
Proteins has 261664 missing values

After Droping Nans:
(103, 60525)
(103, 7016)
After filtering:
Genomics shape: (103, 539)
Proteomics shape: (103, 1746)
            ENSG00000286130.1  ENSG00000278847.1  ENSG00000275310.1  \
Patient_ID                                                            
C3L-00004              -0.911             -0.911             -0.911   
C3L-00010              -1.019             -1.019             -1.019   
C3L-00011              -0.145             -0.145             -0.145   
C3L-00026               0.023              0.023              0.023   
C3L-00079              -1.137             -1.137             -1.137   

            ENSG00000286120.1  ENSG00000286173.1  ENSG00000233619.1  \
Patient_ID                                                            
C3L-00004              -0.911             -0.911             -0.911   
C3L-00010              -1.019             -1.019             -1.019   
C3L-00011         

In [3]:
from bioneuralnet.external_tools import SmCCNet

smccnet = SmCCNet(
    phenotype_df=phenotype,
    omics_dfs=[genomics,proteomics],
    data_types=["genomics","proteomics"],
    output_dir="cancer_output_2",
)
global_network, smccnet_clusters = smccnet.run()

2025-04-04 20:32:32,646 - bioneuralnet.external_tools.smccnet - INFO - Initialized SmCCNet with parameters:
2025-04-04 20:32:32,646 - bioneuralnet.external_tools.smccnet - INFO - K-Fold: 5
2025-04-04 20:32:32,647 - bioneuralnet.external_tools.smccnet - INFO - Summarization: NetSHy
2025-04-04 20:32:32,647 - bioneuralnet.external_tools.smccnet - INFO - Evaluation method: 
2025-04-04 20:32:32,647 - bioneuralnet.external_tools.smccnet - INFO - ncomp_pls: 0
2025-04-04 20:32:32,647 - bioneuralnet.external_tools.smccnet - INFO - subSampNum: 1000
2025-04-04 20:32:32,647 - bioneuralnet.external_tools.smccnet - INFO - BetweenShrinkage: 5.0
2025-04-04 20:32:32,647 - bioneuralnet.external_tools.smccnet - INFO - Seed: 723
2025-04-04 20:32:32,647 - bioneuralnet.external_tools.smccnet - INFO - Output directory set to: cancer_output_2
2025-04-04 20:32:32,648 - bioneuralnet.external_tools.smccnet - INFO - Starting SmCCNet workflow.
2025-04-04 20:32:32,648 - bioneuralnet.external_tools.smccnet - INFO - 

In [11]:
import numpy as np


matrix = global_network.values
num_nonzero = np.count_nonzero(matrix)
total_entries = matrix.size
density = num_nonzero / total_entries
sparsity = 1 - density

print(f"Number of nonzero entries: {num_nonzero}")
print(f"Total entries: {total_entries}")
print(f"Density (nonzero fraction): {density:.4f}")
print(f"Sparsity (zero fraction): {sparsity:.4f}")


matrix = smccnet_clusters[0].values
num_nonzero = np.count_nonzero(matrix)
total_entries = matrix.size
density = num_nonzero / total_entries
sparsity = 1 - density

print(f"Number of nonzero entries: {num_nonzero}")
print(f"Total entries: {total_entries}")
print(f"Density (nonzero fraction): {density:.4f}")
print(f"Sparsity (zero fraction): {sparsity:.4f}")


matrix = smccnet_clusters[1].values
num_nonzero = np.count_nonzero(matrix)
total_entries = matrix.size
density = num_nonzero / total_entries
sparsity = 1 - density

print(f"Number of nonzero entries: {num_nonzero}")
print(f"Total entries: {total_entries}")
print(f"Density (nonzero fraction): {density:.4f}")
print(f"Sparsity (zero fraction): {sparsity:.4f}")


matrix = smccnet_clusters[2].values
num_nonzero = np.count_nonzero(matrix)
total_entries = matrix.size
density = num_nonzero / total_entries
sparsity = 1 - density

print(f"Number of nonzero entries: {num_nonzero}")
print(f"Total entries: {total_entries}")
print(f"Density (nonzero fraction): {density:.4f}")
print(f"Sparsity (zero fraction): {sparsity:.4f}")


Number of nonzero entries: 1079462
Total entries: 5221225
Density (nonzero fraction): 0.2067
Sparsity (zero fraction): 0.7933
Number of nonzero entries: 2070
Total entries: 2116
Density (nonzero fraction): 0.9783
Sparsity (zero fraction): 0.0217
Number of nonzero entries: 4830
Total entries: 4900
Density (nonzero fraction): 0.9857
Sparsity (zero fraction): 0.0143
Number of nonzero entries: 420
Total entries: 441
Density (nonzero fraction): 0.9524
Sparsity (zero fraction): 0.0476


In [9]:
import bioneuralnet
print("BioNeuralNet version:", bioneuralnet.__version__)

BioNeuralNet version: 1.0
