# BioNeuralNet Cancer Example 2

In [None]:
from bioneuralnet.external_tools import get_cancer_data, preprocess_clinical, filter_common_patients
import cptac

luad = cptac.Luad()

genes_raw, proteins_raw, clinical_raw = get_cancer_data(luad)
genes, proteins, clinical_filtered = filter_common_patients(genes_raw, proteins_raw, clinical_raw)
clinical, phenotype = preprocess_clinical(clinical_filtered)

print(f"Genes shaope: {genes.shape}")
print(f"Proteins shape: {proteins.shape}")
print(f"Clinical shape: {clinical.shape}")
print(f"Phenotype shape: {phenotype.shape}")

print(genes.head())
print(proteins.head())
print(clinical.head())
print(phenotype.head())




Common Patients: 109
Genes shaope: (109, 60525)
Proteins shape: (109, 13302)
Clinical shape: (109, 11)
Phenotype shape: (109, 1)
            ENSG00000121410.12  ENSG00000268895.6  ENSG00000148584.15  \
Patient_ID                                                              
11LU013                  0.062              0.062               0.107   
11LU016                 -0.098             -0.098              -0.286   
11LU022                  0.513              0.513              -0.030   
11LU035                 -0.199             -0.199              -0.081   
C3L-00001               -0.052             -0.052              -0.266   

            ENSG00000175899.15  ENSG00000245105.4  ENSG00000166535.20  \
Patient_ID                                                              
11LU013                  0.358              0.358               0.358   
11LU016                 -0.332             -0.332              -0.332   
11LU022                 -0.033             -0.033              -0.0

In [None]:
from sklearn.feature_selection import VarianceThreshold
import pandas as pd

print("Before filtering:\n")
print(f"Gnes has {genes.isnull().sum().sum()} missing values")
print(f"Proteins has {proteins.isnull().sum().sum()} missing values")

# remove columns with Nan values
genes = genes.dropna(axis=1, how='any')
proteins = proteins.dropna(axis=1, how='any')

print("\nAfter Droping Nans:")
print(genes.shape)
print(proteins.shape)

variance_filter = VarianceThreshold(threshold=0.1)
genomics_array = variance_filter.fit_transform(genes)

genomics = pd.DataFrame(
    genomics_array,
    index=genes.index,
    columns=genes.columns[variance_filter.get_support()]
)

variance_filter_prot = VarianceThreshold(threshold=0.1)
proteomics_array = variance_filter_prot.fit_transform(proteins)

proteomics = pd.DataFrame(
    proteomics_array,
    index=proteins.index,
    columns=proteins.columns[variance_filter_prot.get_support()]
)

print("After filtering:")
print("Genomics shape:", genomics.shape)
print("Proteomics shape:", proteomics.shape)
print(genomics.head())
print(proteomics.head())

Before filtering:

Gnes has 0 missing values
Proteins has 272376 missing values

After Droping Nans:
(109, 60525)
(109, 8085)
After filtering:
Genomics shape: (109, 12377)
Proteomics shape: (109, 3645)
            ENSG00000250420.8  ENSG00000131043.12  ENSG00000205002.4  \
Patient_ID                                                             
11LU013                 0.504               0.502              3.657   
11LU016                -0.358               0.207             -0.022   
11LU022                -0.054               0.766              0.372   
11LU035                -0.038              -0.017              0.169   
C3L-00001               0.217               0.227              0.412   

            ENSG00000157426.14  ENSG00000179869.15  ENSG00000004846.16  \
Patient_ID                                                               
11LU013                  0.000               0.600               0.600   
11LU016                  0.000               0.634               0.634 

In [None]:
from bioneuralnet.external_tools import SmCCNet

smccnet = SmCCNet(
    phenotype_df=phenotype,
    omics_dfs=[genomics,proteomics],
    data_types=["genomics","proteomics"],
    output_dir="cancer_output_1",
)
global_network, smccnet_clusters = smccnet.run()

2025-04-04 19:36:43,956 - bioneuralnet.external_tools.smccnet - INFO - Initialized SmCCNet with parameters:
2025-04-04 19:36:43,956 - bioneuralnet.external_tools.smccnet - INFO - K-Fold: 5
2025-04-04 19:36:43,956 - bioneuralnet.external_tools.smccnet - INFO - Summarization: NetSHy
2025-04-04 19:36:43,956 - bioneuralnet.external_tools.smccnet - INFO - Evaluation method: 
2025-04-04 19:36:43,956 - bioneuralnet.external_tools.smccnet - INFO - ncomp_pls: 0
2025-04-04 19:36:43,956 - bioneuralnet.external_tools.smccnet - INFO - subSampNum: 1000
2025-04-04 19:36:43,957 - bioneuralnet.external_tools.smccnet - INFO - BetweenShrinkage: 5.0
2025-04-04 19:36:43,957 - bioneuralnet.external_tools.smccnet - INFO - Seed: 723
2025-04-04 19:36:43,957 - bioneuralnet.external_tools.smccnet - INFO - Starting SmCCNet workflow.
2025-04-04 19:36:43,957 - bioneuralnet.external_tools.smccnet - INFO - Validating and serializing input data for SmCCNet...
2025-04-04 19:36:43,958 - bioneuralnet.external_tools.smccn

FileNotFoundError: [Errno 2] No such file or directory: 'cancer_output/GlobalNetwork.csv'

In [None]:
import bioneuralnet
print("BioNeuralNet version:", bioneuralnet.__version__)