In [1]:
import os
os.environ["RPY2_CFFI_MODE"] = "ABI"
import rpy2.robjects as ro
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter
from rpy2.rinterface_lib.callbacks import logger as rpy2_logger
rpy2_logger.setLevel("ERROR")   # 只显示错误，屏蔽 message 和 warning
rpy2_logger.propagate = False   # 阻止继续传给 root logger

import sys
sys.path.append("../../")

# from scSurvival.scsurvival import scSurvival, scSurvivalRun, PredictIndSample
from scSurvival_beta import scSurvivalRun, PredictIndSample
import torch
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import os 
os.environ['KMP_DUPLICATE_LIB_OK']='True'

from tqdm import tqdm, trange
import scanpy as sc
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

from sklearn.metrics import classification_report
from sklearn.model_selection import KFold
import io
import contextlib
f = io.StringIO()
from lifelines.utils import concordance_index
from scipy.stats import percentileofscore
from utils import *

In [2]:
def load_r_packages():
    ro.r('''  
    rm(list=ls())
    # library("scater")
    library("splatter")
    library(scran)
    library(Seurat)
    # library(preprocessCore)
    library(pROC)
    # library(APML2)
    # library(APML1)
    # library(APML0)

    library(ggplot2)
    library(dplyr)
    library(caret)
    set.seed(1)
    ''')


def simulated_base_sc_dataset(seed=42, plot=False, cell_surv_ratio=0.15, sub_cluster_ratio=0.3, de_facloc=0.35):
    ro.r(f'''
    seed <- {seed}
    alpha = {cell_surv_ratio}
    sub_cluster_ratio = {sub_cluster_ratio}
    de_facLoc = {de_facloc}
    num_bg_clusters = 3

    # ----------------------
    # 第一步：生成 3 个大类
    # ----------------------
    sim.top <- splatSimulateGroups(
      batchCells = 10000, nGenes = 2000,
      group.prob = c(1- 2*alpha, alpha, alpha),   # 背景:70%, A:15%, B:15%
      de.prob = c(0.6, 0.3, 0.3),
      de.facLoc = c(0.6, 0.2, 0.2),
      de.facScale = c(0.6, 0.6, 0.6),
      seed = seed
    )

    # ----------------------
    # 第二步：细分 A -> (2,3)，B -> (4,5)
    # ----------------------
    sce.A <- sim.top[, sim.top$Group == "Group2"]
    sim.A <- splatSimulateGroups(
      # params.ref,
      batchCells = ncol(sce.A),
      nGenes = 2000,
      group.prob = c(sub_cluster_ratio, 1-sub_cluster_ratio),          
      de.prob = c(0.02, 0.02),             # 保证 2/3 相似
      de.facLoc = c(de_facLoc, de_facLoc),
      de.facScale = c(0.1, 0.1),
      seed = seed + 1
    )

    sce.B <- sim.top[, sim.top$Group == "Group3"]
    sim.B <- splatSimulateGroups(
      # params.ref,
      batchCells = ncol(sce.B),
      nGenes = 2000,
      group.prob = c(sub_cluster_ratio, 1-sub_cluster_ratio),          
      de.prob = c(0.02, 0.02),           
      de.facLoc = c(de_facLoc, de_facLoc),
      de.facScale = c(0.1, 0.1),
      seed = seed + 2
    )

    # 合并 counts
    sce.bg <- sim.top[, sim.top$Group == "Group1"]
    expr.all <- cbind(
      counts(sce.bg),
      counts(sim.A),
      counts(sim.B)
    )

    # 构造 group 标签
    group.labels <- c(
      rep("Group1", ncol(sce.bg)),
      paste0("Group", 2:3)[sim.A$Group],
      paste0("Group", 4:5)[sim.B$Group]
    )

    # 生成完整的 SCE 对象
    sim.groups <- SingleCellExperiment(
      assays = list(counts = expr.all)
    )
    colData(sim.groups)$Group <- factor(group.labels, levels = paste0("Group", 1:5))

    counts.mat <- counts(sim.groups)
    rownames(counts.mat) <- make.unique(rownames(counts.mat))
    colnames(counts.mat) <- paste0('Cell', 1:ncol(counts.mat))

    data <- CreateSeuratObject(counts = counts.mat)
    data <- AddMetaData(object = data, metadata = sim.groups$Group, col.name = "sim.group")

    sim.groups.recode <- rep('other', dim(data)[2])
    sim.groups.recode[data$sim.group %in% c('Group2')] <- 'good.survival'
    sim.groups.recode[data$sim.group %in% c('Group4')] <- 'bad.survival'
    data$Actual.cond <- factor(sim.groups.recode, levels=c('other', 'good.survival', 'bad.survival'))

    select_gene_ids <- 1:2000
    data <- NormalizeData(object = data, normalization.method = "LogNormalize", 
                          scale.factor = 10000)
    data <- FindVariableFeatures(object = data, selection.method = 'vst', nfeatures=2000)
    var_features_genes = VariableFeatures(data)
    ''')

    if plot:
        ro.r('''
        data <- ScaleData(object = data)
        data <- RunPCA(object = data, features = VariableFeatures(data)[select_gene_ids])

        data <- RunUMAP(object = data, dims = 1:10, n.neighbors = 5, min.dist=0.5, spread=1.5)
        # data <- RunUMAP(object = data, dims = 1:10)
        data <- FindNeighbors(object = data, dims = 1:10, k.param=20)
        # data <- FindNeighbors(object = data, dims = 1:10, k.param=20,prune.SNN = 0.2)
        data <- FindClusters(object = data, resolution = 0.5)

        DimPlot(object = data, reduction = 'umap', group.by = 'seurat_clusters', label = F, label.size = 10,pt.size=0.5)
        ggsave(paste0(save_path, 'simu_seurat_cluster_umap.pdf'), height = 5, width = 7)

        DimPlot(object = data, reduction = 'umap', group.by = 'sim.group', pt.size = 0.5, label = T)
        ggsave(paste0(save_path, 'simu_group_umap.pdf'), height = 5, width = 7)

        # DimPlot(object = data, reduction = 'umap',  cols = c('grey','blue', 'red'), group.by = 'sim.group', pt.size = 0.5, label = T)
        # 
        DimPlot(object = data, reduction = 'umap',  cols = c('grey','blue', 'red'), group.by = 'Actual.cond', pt.size = 0.5, label = T)
        ggsave(paste0(save_path, 'simu_surv_group_umap.pdf'), height = 5, width = 7)
        ''')

def simulated_sc_datasets(plot=False):
    ro.r('''
    Expression_pbmc <- as.matrix(data@assays[["RNA"]]@layers[["data"]])
    rownames(Expression_pbmc) <- rownames(data)
    colnames(Expression_pbmc) <- colnames(data)
    Expression_pbmc <- as.data.frame(Expression_pbmc)
    all_genes <- rownames(Expression_pbmc)
         
    sampled_cells = 1000
    bulk_num=100

    other_cells <- colnames(Expression_pbmc)[data$Actual.cond=='other']
    good_cells <- colnames(Expression_pbmc)[data$Actual.cond=='good.survival']
    bad_cells <- colnames(Expression_pbmc)[data$Actual.cond=='bad.survival']
    num_good <- length(good_cells)
    num_bad <- length(bad_cells)

    bulk_condition = NULL
    censor_prob = 0.1

    status = NULL
    surv_time = NULL

    num_good_cond_cells = NULL
    num_bad_cond_cells = NULL
         
    risk_prop <- alpha / (1 - alpha)

    sc_data_list = list()
    pb <- txtProgressBar(min = 1, max = bulk_num, style = 3)
    for (i in 1:bulk_num){
      setTxtProgressBar(pb, i)
      set.seed(seed+i)   
      ratio <- (i-1) / (bulk_num-1)
      # ratio <- plogis((ratio - 0.5) * 2 * 6)
      num_good_cond_cells_i = round(sampled_cells * ratio * risk_prop)
      num_bad_cond_cells_i = round(sampled_cells * (1-ratio) * risk_prop)
  
      # bg_rand_prob <- runif(num_bg_clusters)
      bg_rand_prob <- runif(num_bg_clusters)
      bg_rand_prob_1 <- runif(1, min=0.5, max=1)
      bg_rand_prob_23 <- runif(2, min=0, max=(1-bg_rand_prob_1))
      bg_rand_prob_23 <- bg_rand_prob_23 / sum(bg_rand_prob_23) * (1 - bg_rand_prob_1)
      bg_rand_prob <- c(bg_rand_prob_1, bg_rand_prob_23)
      bg_rand_prob <- bg_rand_prob / sum(bg_rand_prob) * (1 - risk_prop)
      num_bg_cond_cells_i <- round(sampled_cells * bg_rand_prob)
  
  
      condition_good_cells <- good_cells[sample(num_good, num_good_cond_cells_i , replace=TRUE)]
      condition_bad_cells <- bad_cells[sample(num_bad, num_bad_cond_cells_i, replace=TRUE)]

      num_rc <- length(unique(data$sim.group[c(condition_good_cells, condition_bad_cells)]))
      condition_bg_cells <- c()
      bg_clusters <- unique(data$sim.group[other_cells])
      for (j in 1:num_bg_clusters){
        bg_cells <- colnames(Expression_pbmc)[data$sim.group==bg_clusters[j]]
        num_bg_cells_j <- num_bg_cond_cells_i[j]
        condition_bg_cells_j <- bg_cells[sample(length(bg_cells), num_bg_cells_j, replace=TRUE)]
        condition_bg_cells <- c(condition_bg_cells, condition_bg_cells_j)
      }
  
      condition_cells <- c(condition_good_cells, condition_bad_cells, condition_bg_cells)
  
      num_good_cond_cells = c(num_good_cond_cells, num_good_cond_cells_i)
      num_bad_cond_cells = c(num_bad_cond_cells, num_bad_cond_cells_i)
      Expression_selected <- Expression_pbmc[, condition_cells]
  
      sc_data_list[[sprintf('bulk%d', i)]] <- Expression_selected

      if (runif(1, min = 0, max = 1) < censor_prob){
        status = c(status, 0)
        surv_time = c(surv_time, sample(i, 1))
      }
      else{
        surv_time = c(surv_time, i)
        status = c(status, 1)
      }
    }

    bulk_names <- paste0('bulk', 1:bulk_num)
    surv_info <- data.frame(
      time=surv_time,
      status=status,
      num.good.cells = num_good_cond_cells,
      num.bad.cells = num_bad_cond_cells,
      row.names = bulk_names
    )

    dim(surv_info)
    dim(Expression_pbmc)
         
    labels <- data$Actual.cond
    labels <- as.data.frame(labels)
    row.names(labels) <- colnames(data)
    
    ''')

    if plot:
        ro.r('''
        library(gridExtra)
        library(ggpubr)

        plot_list <- list()
        sampled_cells = 1000
        for (i in c(2, 10, 40, 60, 90, 99)){
          set.seed(seed+i)
          ratio <- (i-1) / (bulk_num-1)
          # ratio <- plogis((ratio - 0.5) * 2 * 6)
          num_good_cond_cells_i = round(sampled_cells * ratio * risk_prop)
          num_bad_cond_cells_i = round(sampled_cells * (1-ratio) * risk_prop)
  
          # bg_rand_prob <- runif(num_bg_clusters)
          bg_rand_prob <- runif(num_bg_clusters)
          bg_rand_prob_1 <- runif(1, min=0.5, max=1)
          bg_rand_prob_23 <- runif(2, min=0, max=(1-bg_rand_prob_1))
          bg_rand_prob_23 <- bg_rand_prob_23 / sum(bg_rand_prob_23) * (1 - bg_rand_prob_1)
          bg_rand_prob <- c(bg_rand_prob_1, bg_rand_prob_23)
          # print('bg_rand_prob:')
          # print(bg_rand_prob)
          # print(digest(bg_rand_prob)) 
          bg_rand_prob <- bg_rand_prob / sum(bg_rand_prob) * (1 - risk_prop)
          num_bg_cond_cells_i <- round(sampled_cells * bg_rand_prob)
  
  
          condition_good_cells <- good_cells[sample(num_good, num_good_cond_cells_i , replace=T)]
          condition_bad_cells <- bad_cells[sample(num_bad, num_bad_cond_cells_i, replace=T)]
          
          num_rc <- length(unique(data$sim.group[c(condition_good_cells, condition_bad_cells)]))
          condition_bg_cells <- c()
          bg_clusters <- unique(data$sim.group[other_cells])
          for (j in 1:num_bg_clusters){
            bg_cells <- colnames(Expression_pbmc)[data$sim.group==bg_clusters[j]]
            num_bg_cells_j <- num_bg_cond_cells_i[j]
            condition_bg_cells_j <- bg_cells[sample(length(bg_cells), num_bg_cells_j, replace=T)]
            condition_bg_cells <- c(condition_bg_cells, condition_bg_cells_j)
          }
  
          condition_cells <- c(condition_good_cells, condition_bad_cells, condition_bg_cells)
  
  
          p <- DimPlot(data[, condition_cells], group.by = 'Actual.cond', cols = c('grey','blue', 'red'), pt.size = 0.5) +
          ggtitle(sprintf("survival.time: %d months", i))
          plot_list[[length(plot_list) + 1]] <- p
        }

        # combined_plot <- do.call(grid.arrange, c(plot_list, ncol = 3))
        # combined_plot
        ggarrange(plotlist = plot_list, ncol = 3, nrow=2, common.legend = TRUE, legend = "bottom")
        ggsave(paste0(save_path, 'survival.time.simulated_real.pdf'), height = 7, width = 10.5)
        ''')

    # collected sc_data_list, surv_info, Expression_pbmc and transfer to python
    surv_info_df     = r_to_pandas("surv_info")
    Expression_pbmc_df = r_to_pandas("Expression_pbmc")
    sc_data_list     = r_list_to_pydict_df("sc_data_list")  # dict: { 'bulk_1': DataFrame, ... }
    labels_df       = r_to_pandas("labels")
    features = {
        'all_genes': list(ro.r("all_genes")),
        'hvg': list(ro.r("var_features_genes"))
    }

    return_data = {
        'sc_data_list': sc_data_list,
        'surv_info_df': surv_info_df,
        'Expression_pbmc_df': Expression_pbmc_df,
        'labels_df': labels_df,
        'features': features
    }

    return return_data

In [3]:
def organize_data_for_model(datasets):
    sc_data_list = datasets['sc_data_list']
    clinic = datasets['surv_info_df']

    xs = []
    samples = []
    for key, val in tqdm(sc_data_list.items()):
        df = val
        xs.append(df.values.T)
        samples.extend([key] * df.shape[1])

    X = np.concatenate(xs, axis=0)
    adata = sc.AnnData(X, obs=pd.DataFrame(samples, index=np.arange(X.shape[0]), columns=['sample']),
    var=pd.DataFrame(index=datasets['features']['all_genes']))

    adata.raw = adata.copy()
    adata = adata[:, datasets['features']['hvg']]

    surv = clinic[['time', 'status']].copy()
    surv['time'] = surv['time'].astype(float)
    surv['status'] = surv['status'].astype(int)

    df = datasets['Expression_pbmc_df']
    x = df.values.T
    sim_group = datasets['labels_df']
    sim_group = sim_group['labels'].values

    adata_new = sc.AnnData(x, obs=pd.DataFrame(sim_group, index=np.arange(x.shape[0]), columns=['sim_group']), var=pd.DataFrame(index=datasets['features']['all_genes']))

    return adata, surv, adata_new

def detect_subpopulations(adata, surv, adata_new, entropy_threshold=0.7):
    adata, surv, model = scSurvivalRun(adata, 
        sample_column='sample',
        surv=surv,
        # batch_key='batch',
        feature_flavor='AE',
        entropy_threshold=entropy_threshold,
        lambdas=(0.01, 1.0),
        pretrain_epochs=200,
        epochs=500,
        weight_decay=0.01,
        lr=0.001,
        patience=100,
        rec_likelihood='ZIG',
        do_scale_ae=False,
        beta=0.1, tau=0.2, 
        # num_heads=16,
        sample_size_ae=None,
        finetue_lr_factor=0.1,
        gene_weight_alpha=0.2,
        gamma_beta_weight=(0.1, 0.0),
        once_load_to_gpu=True,
        use_amp=False,
        fitnetune_strategy='alternating', # jointly, alternating, alternating_lightly,
        )

    data = adata.obs['attention'].values.reshape(-1, 1)
    kmeans = KMeans(n_clusters=2, random_state=42)
    kmeans.fit(data)
    cluster_centers = kmeans.cluster_centers_
    atten_thr = cluster_centers.flatten().mean()
    
    adata_new, _ = PredictIndSample(adata_new, adata, model)

    attention = adata_new.obs['attention'].values
    hazard_adj = adata_new.obs['hazard_adj'].values
    hazard = adata_new.obs['hazard'].values

    risk_group = np.array(['inattentive'] * attention.shape[0], dtype=object)
    risk_group[np.logical_and(attention >= atten_thr, hazard_adj > 0)] = 'higher'
    risk_group[np.logical_and(attention >= atten_thr, hazard_adj <= 0)] = 'lower'

    # higher -> bad.survival, lower -> good.survival, inattentive -> other 

    risk_group_recoded = np.array(['other'] * attention.shape[0], dtype=object)
    risk_group_recoded[risk_group == 'higher'] = 'bad.survival'
    risk_group_recoded[risk_group == 'lower'] = 'good.survival'

    clf_report = classification_report(adata_new.obs['sim_group'].values, risk_group_recoded, output_dict=True, zero_division=0)

    clf_report_df = pd.DataFrame(clf_report).T
    return clf_report_df, adata_new

def cross_validation_samples(adata, surv, entropy_threshold=0.7):
    # 交叉验证样本
    adata = adata.raw.to_adata()
    adata.obs['patient_no'] = adata.obs['sample']
    patients = adata.obs['patient_no'].unique()

    # K fold cross validation
    cv_hazards_adj_cells = np.zeros((adata.shape[0], ))
    surv['cv_hazards_adj_patient'] = 0.0
    surv['cv_hazard_percentile_patient'] = 0.0
    cindexs = []
    surv_test_all_folds = []

    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    for i, (train_index, test_index) in enumerate(kf.split(patients)):

        print(f'fold {i}, train_size: {train_index.shape[0]}, test_size: {test_index.shape[0]}')
        train_patients = patients[train_index]
        test_patients = patients[test_index]

        # train
        adata_train = adata[adata.obs['patient_no'].isin(train_patients), :].copy()
    
        ## select HVGs on training set only
        sc.pp.highly_variable_genes(adata_train, n_top_genes=2000, subset=False, flavor='seurat')
        hvgs = adata_train.var[adata_train.var['highly_variable']].index.tolist() 
        adata_train = adata_train[:, hvgs]

        surv_train = surv.loc[surv.index.isin(train_patients), :].copy()

        adata_train, surv_train, model = scSurvivalRun(
            adata_train,
            sample_column='sample',
            surv=surv_train,
            # batch_key='batch',
            feature_flavor='AE',
            entropy_threshold=entropy_threshold,
            validate=True,
            validate_ratio=0.2,
            validate_metric='ccindex',
            lambdas=(0.01, 1.0),
            pretrain_epochs=200,
            epochs=500,
            weight_decay=0.01,
            lr=0.001,
            patience=100,
            rec_likelihood='ZIG',
            do_scale_ae=False,
            beta=0.1, tau=0.2, 
            sample_size_ae=None,
            finetue_lr_factor=0.1,
            gene_weight_alpha=0.2,
            gamma_beta_weight=(0.1, 0.0),
            once_load_to_gpu=True,
            use_amp=False,
            fitnetune_strategy='alternating', # jointly, alternating, alternating_lightly,
            )  
        
        
        train_cindex = concordance_index(surv_train['time'], -surv_train['patient_hazards'], surv_train['status'])
        print(f'train c-index: {train_cindex:.4f}')

        # test
        print('testing...')
        adata_test = adata[adata.obs['patient_no'].isin(test_patients), :].copy()
        adata_test = adata_test[:, hvgs]

        with contextlib.redirect_stdout(f):
            for test_patient in test_patients:
                adata_test_patient = adata_test[adata_test.obs['patient_no'] == test_patient, :].copy()
                adata_test_patient, patient_hazard = PredictIndSample(adata_test_patient, adata_train, model)
                cv_hazards_adj_cells[adata.obs['patient_no'] == test_patient] = adata_test_patient.obs['hazard_adj'].values
                surv.loc[surv.index == test_patient, 'cv_hazards_adj_patient'] = patient_hazard
                surv.loc[surv.index == test_patient, 'cv_hazard_percentile_patient'] = percentileofscore(surv_train['patient_hazards'], patient_hazard, kind='rank')

        surv_test = surv.loc[surv.index.isin(test_patients), :]
        c_index = concordance_index(surv_test['time'], -surv_test['cv_hazards_adj_patient'], surv_test['status'])

        cindexs.append(c_index)
        surv_test_all_folds.append(surv_test)

        print(f'c-index: {c_index:.4f}')
        print('='*50)

        # if i == 0:
        #     break

    mean_cindex = np.mean(cindexs)
    std_cindex = np.std(cindexs)

    print(f'mean c-index: {mean_cindex:.4f} ± {std_cindex:.4f}')
    cindexs_df = pd.DataFrame(cindexs, columns=['c-index'], index=['fold%d' % i for i in range(5)])

    cindex_results = {
        'mean_cindex': mean_cindex,
        'std_cindex': std_cindex,
        'cindexs_df': cindexs_df
    }

    return cindex_results


In [4]:
ro.r('.libPaths()')

0,1
'C:/Users/renta/AppData...,'C:/Program Files/R/R-4...


In [5]:
load_r_packages()


    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
    

In [6]:
from utils import Logger
from itertools import product
cell_surv_ratio=0.15

load_r_packages()
# param_grid = {
#     'seed': range(1, 11),
#     'sub_cluster_ratio': [0.3, 0.5, 0.7],
#     'de_facloc': [0.2, 0.35, 0.5],

# }

param_grid = {
    'seed': range(1, 2),
    'sub_cluster_ratio': [0.3],
    'de_facloc': [0.35],

}
keys, values = zip(*param_grid.items())
combos = [dict(zip(keys, v)) for v in product(*values)]

save_root_path = './results/revision-sim6-python/'
logger = Logger(save_path=f'{save_root_path}cell_subpopulation_logs_rpcase.csv')

for i, params in enumerate(combos):
    # if params['seed'] > 1:
    #     break
    
    logger.log_dict(params)
    seed = params['seed']
    sub_cluster_ratio = params['sub_cluster_ratio']
    de_facloc = params['de_facloc']

    print(f'Running {i+1}/{len(combos)}: seed={seed}, sub_cluster_ratio={sub_cluster_ratio}, de_facloc={de_facloc}')

    save_path = f'{save_root_path}/nbgs-{sub_cluster_ratio}_defacloc-{de_facloc}/seed-{seed}/'
    ro.globalenv['save_path'] = save_path
    
    if seed == 1:
        ro.r('dir.create(save_path, recursive=T)')
        simulated_base_sc_dataset(seed=seed, plot=True, cell_surv_ratio=cell_surv_ratio, sub_cluster_ratio=sub_cluster_ratio, de_facloc=de_facloc)
        datasets = simulated_sc_datasets(plot=True)
    else:
        simulated_base_sc_dataset(seed=seed, plot=False, cell_surv_ratio=cell_surv_ratio, sub_cluster_ratio=sub_cluster_ratio, de_facloc=de_facloc)
        datasets = simulated_sc_datasets(plot=False)

    adata, surv, adata_new = organize_data_for_model(datasets)
    clf_report_df, adata_new = detect_subpopulations(adata, surv, adata_new, entropy_threshold=0.7)

    clf_rst = {
        'precision': clf_report_df.loc['macro avg', 'precision'],
        'recall': clf_report_df.loc['macro avg', 'recall'],
        'f1-score': clf_report_df.loc['macro avg', 'f1-score'],
    }

    for cls in ['good.survival', 'bad.survival', 'other']:
        for metric in ['precision', 'recall', 'f1-score']:
            key = f'{cls}_{metric}'
            if cls in clf_report_df.index:
                clf_rst[key] = clf_report_df.loc[cls, metric]
            else:
                clf_rst[key] = 0.0

    logger.log_dict(clf_rst)
    logger.get_logs_df()

    if seed==1:
        adata_new.obs.to_csv(f'{save_path}/adata_new_obs.csv')

    # break

Running 1/1: seed=1, sub_cluster_ratio=0.3, de_facloc=0.35
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 10000
Number of edges: 279457

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7676
Number of communities: 3
Elapsed time: 1 seconds

100%|██████████| 100/100 [00:00<00:00, 39133.27it/s]
Pretraining: 100%|██████████| 200/200 [05:31<00:00,  1.66s/it, ae_loss=167]   
Finetuning: 100%|██████████| 500/500 [52:50<00:00,  6.34s/it, ae_loss=139, atten_entropy=0.688, cox_loss=1.92, loss=3.31]


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
gene missing rate: 0.00%
Added hazard and attention to adata_new.obs.


In [7]:
from utils import Logger
from itertools import product
load_r_packages()
param_grid = {
    'seed': [1],
    'sub_cluster_ratio': [0.3, 0.5, 0.7],
    'de_facloc': [0.2, 0.35, 0.5],
}
keys, values = zip(*param_grid.items())
combos = [dict(zip(keys, v)) for v in product(*values)]

save_root_path = './results/revision-sim6-python/'
logger = Logger(save_path=f'{save_root_path}cv_logs.csv')

for i, params in enumerate(combos): 
    logger.log_dict(params)
    seed = params['seed']
    sub_cluster_ratio = params['sub_cluster_ratio']
    de_facloc = params['de_facloc']

    print(f'Running {i+1}/{len(combos)}: seed={seed}, sub_cluster_ratio={sub_cluster_ratio}, de_facloc={de_facloc}')

    simulated_base_sc_dataset(seed=seed, plot=False, cell_surv_ratio=cell_surv_ratio, sub_cluster_ratio=sub_cluster_ratio, de_facloc=de_facloc)
    datasets = simulated_sc_datasets(plot=False)

    adata, surv, adata_new = organize_data_for_model(datasets)
    cindex_results = cross_validation_samples(adata, surv, entropy_threshold=0.7)

    cindex_results = {
        'mean_cindex': cindex_results['mean_cindex'],
        'std_cindex': cindex_results['std_cindex']
    }

    logger.log_dict(cindex_results)
    logger.get_logs_df()


Running 1/9: seed=1, sub_cluster_ratio=0.3, de_facloc=0.2

100%|██████████| 100/100 [00:00<00:00, 98945.60it/s]


fold 0, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.15it/s, ae_loss=203]   
Finetuning:  53%|█████▎    | 264/500 [03:51<03:26,  1.14it/s, ae_loss=170, atten_entropy=0.677, ccindex_val=0.943, cox_loss=1.55, loss=3.24]


Early stopping with best validation ccindex: 0.9589.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9753
testing...
c-index: 0.9562
fold 1, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.13it/s, ae_loss=184]   
Finetuning:  64%|██████▍   | 321/500 [04:42<02:37,  1.14it/s, ae_loss=149, atten_entropy=0.656, ccindex_val=0.931, cox_loss=1.66, loss=3.15]


Early stopping with best validation ccindex: 0.9507.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9759
testing...
c-index: 0.9641
fold 2, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.14it/s, ae_loss=200]   
Finetuning:  39%|███▊      | 193/500 [02:54<04:37,  1.11it/s, ae_loss=174, atten_entropy=0.657, ccindex_val=0.95, cox_loss=1.51, loss=3.25] 


Early stopping with best validation ccindex: 0.9593.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9790
testing...
c-index: 0.9331
fold 3, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:34<00:00,  2.13it/s, ae_loss=152]   
Finetuning:  61%|██████    | 305/500 [04:39<02:58,  1.09it/s, ae_loss=119, atten_entropy=0.665, ccindex_val=0.952, cox_loss=1.53, loss=2.73]


Early stopping with best validation ccindex: 0.9605.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9754
testing...
c-index: 0.8889
fold 4, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.14it/s, ae_loss=139]   
Finetuning:  36%|███▋      | 182/500 [02:44<04:48,  1.10it/s, ae_loss=114, atten_entropy=0.685, ccindex_val=0.942, cox_loss=1.61, loss=2.75]


Early stopping with best validation ccindex: 0.9544.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9687
testing...
c-index: 0.9854
mean c-index: 0.9455 ± 0.0329
Running 2/9: seed=1, sub_cluster_ratio=0.3, de_facloc=0.35

100%|██████████| 100/100 [00:00<00:00, 50093.20it/s]


fold 0, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.14it/s, ae_loss=123]   
Finetuning:  41%|████      | 205/500 [03:06<04:28,  1.10it/s, ae_loss=97.5, atten_entropy=0.713, ccindex_val=0.955, cox_loss=1.66, loss=2.65]


Early stopping with best validation ccindex: 0.9687.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9819
testing...
c-index: 0.9708
fold 1, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.13it/s, ae_loss=152]   
Finetuning:  63%|██████▎   | 313/500 [04:41<02:48,  1.11it/s, ae_loss=116, atten_entropy=0.694, ccindex_val=0.968, cox_loss=1.47, loss=2.63]


Early stopping with best validation ccindex: 0.9707.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9871
testing...
c-index: 0.9902
fold 2, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.13it/s, ae_loss=160]   
Finetuning:  57%|█████▋    | 285/500 [04:17<03:14,  1.11it/s, ae_loss=127, atten_entropy=0.7, ccindex_val=0.966, cox_loss=1.58, loss=2.86]  


Early stopping with best validation ccindex: 0.9765.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9855
testing...
c-index: 0.9593
fold 3, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.13it/s, ae_loss=140]   
Finetuning:  36%|███▌      | 180/500 [02:40<04:46,  1.12it/s, ae_loss=115, atten_entropy=0.709, ccindex_val=0.951, cox_loss=1.72, loss=2.87]


Early stopping with best validation ccindex: 0.9670.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9804
testing...
c-index: 0.9532
fold 4, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.14it/s, ae_loss=167]   
Finetuning:  57%|█████▋    | 285/500 [04:13<03:11,  1.12it/s, ae_loss=136, atten_entropy=0.692, ccindex_val=0.964, cox_loss=1.53, loss=2.89]


Early stopping with best validation ccindex: 0.9681.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9822
testing...
c-index: 0.9818
mean c-index: 0.9711 ± 0.0137
Running 3/9: seed=1, sub_cluster_ratio=0.3, de_facloc=0.5

100%|██████████| 100/100 [00:00<00:00, 92304.23it/s]


fold 0, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.15it/s, ae_loss=134]   
Finetuning:  51%|█████     | 254/500 [03:47<03:40,  1.12it/s, ae_loss=101, atten_entropy=0.691, ccindex_val=0.963, cox_loss=1.58, loss=2.6] 


Early stopping with best validation ccindex: 0.9703.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9832
testing...
c-index: 0.9781
fold 1, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.13it/s, ae_loss=206]   
Finetuning:  44%|████▍     | 220/500 [03:17<04:10,  1.12it/s, ae_loss=177, atten_entropy=0.69, ccindex_val=0.967, cox_loss=1.6, loss=3.36]  


Early stopping with best validation ccindex: 0.9698.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9848
testing...
c-index: 0.9869
fold 2, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.13it/s, ae_loss=128]   
Finetuning:  45%|████▍     | 223/500 [03:17<04:04,  1.13it/s, ae_loss=102, atten_entropy=0.742, ccindex_val=0.959, cox_loss=1.68, loss=2.74]


Early stopping with best validation ccindex: 0.9731.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9839
testing...
c-index: 0.9709
fold 3, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.13it/s, ae_loss=123]   
Finetuning:  39%|███▉      | 196/500 [02:57<04:35,  1.10it/s, ae_loss=98.3, atten_entropy=0.708, ccindex_val=0.958, cox_loss=1.68, loss=2.67]


Early stopping with best validation ccindex: 0.9706.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9851
testing...
c-index: 0.9357
fold 4, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.14it/s, ae_loss=177]   
Finetuning:  34%|███▍      | 169/500 [02:27<04:49,  1.14it/s, ae_loss=151, atten_entropy=0.691, ccindex_val=0.957, cox_loss=1.71, loss=3.22]

Early stopping with best validation ccindex: 0.9685.



  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9811
testing...
c-index: 0.9927
mean c-index: 0.9729 ± 0.0200
Running 4/9: seed=1, sub_cluster_ratio=0.5, de_facloc=0.2

100%|██████████| 100/100 [00:00<00:00, 99508.99it/s]


fold 0, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.15it/s, ae_loss=173]   
Finetuning: 100%|██████████| 500/500 [07:16<00:00,  1.15it/s, ae_loss=133, atten_entropy=0.674, ccindex_val=0.909, cox_loss=1.61, loss=2.94]
  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9614
testing...
c-index: 0.8311
fold 1, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.15it/s, ae_loss=240]   
Finetuning:  41%|████      | 206/500 [03:00<04:16,  1.14it/s, ae_loss=215, atten_entropy=0.684, ccindex_val=0.883, cox_loss=1.92, loss=4.07]


Early stopping with best validation ccindex: 0.9009.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9267
testing...
c-index: 0.9091
fold 2, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.14it/s, ae_loss=184]   
Finetuning:  58%|█████▊    | 292/500 [04:15<03:02,  1.14it/s, ae_loss=154, atten_entropy=0.703, ccindex_val=0.886, cox_loss=1.59, loss=3.14]


Early stopping with best validation ccindex: 0.8877.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9456
testing...
c-index: 0.9000
fold 3, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.15it/s, ae_loss=170]   
Finetuning:  44%|████▎     | 218/500 [03:07<04:03,  1.16it/s, ae_loss=145, atten_entropy=0.808, ccindex_val=0.898, cox_loss=1.74, loss=3.3] 


Early stopping with best validation ccindex: 0.9062.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9355
testing...
c-index: 0.8500
fold 4, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.14it/s, ae_loss=157]   
Finetuning:  51%|█████▏    | 257/500 [03:40<03:28,  1.16it/s, ae_loss=132, atten_entropy=0.691, ccindex_val=0.888, cox_loss=1.59, loss=2.91]


Early stopping with best validation ccindex: 0.9019.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9463
testing...
c-index: 0.8867
mean c-index: 0.8754 ± 0.0299
Running 5/9: seed=1, sub_cluster_ratio=0.5, de_facloc=0.35

100%|██████████| 100/100 [00:00<00:00, 100342.20it/s]


fold 0, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:32<00:00,  2.16it/s, ae_loss=148]   
Finetuning:  52%|█████▏    | 261/500 [03:42<03:23,  1.17it/s, ae_loss=121, atten_entropy=0.752, ccindex_val=0.958, cox_loss=1.55, loss=2.81]


Early stopping with best validation ccindex: 0.9682.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9761
testing...
c-index: 0.9696
fold 1, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:32<00:00,  2.15it/s, ae_loss=186]   
Finetuning:  41%|████      | 203/500 [02:53<04:14,  1.17it/s, ae_loss=161, atten_entropy=0.699, ccindex_val=0.95, cox_loss=1.56, loss=3.17] 


Early stopping with best validation ccindex: 0.9621.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9813
testing...
c-index: 0.9935
fold 2, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.15it/s, ae_loss=193]   
Finetuning:  46%|████▌     | 229/500 [03:16<03:52,  1.17it/s, ae_loss=163, atten_entropy=0.708, ccindex_val=0.953, cox_loss=1.66, loss=3.31]


Early stopping with best validation ccindex: 0.9727.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9855
testing...
c-index: 0.9737
fold 3, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.15it/s, ae_loss=247]   
Finetuning:  52%|█████▏    | 258/500 [03:41<03:27,  1.17it/s, ae_loss=214, atten_entropy=0.701, ccindex_val=0.962, cox_loss=1.55, loss=3.69]


Early stopping with best validation ccindex: 0.9667.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9801
testing...
c-index: 0.9667
fold 4, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.14it/s, ae_loss=154]   
Finetuning:  55%|█████▌    | 276/500 [03:59<03:14,  1.15it/s, ae_loss=127, atten_entropy=0.705, ccindex_val=0.954, cox_loss=1.58, loss=2.86]


Early stopping with best validation ccindex: 0.9622.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9842
testing...
c-index: 0.9733
mean c-index: 0.9754 ± 0.0094
Running 6/9: seed=1, sub_cluster_ratio=0.5, de_facloc=0.5

100%|██████████| 100/100 [00:00<00:00, 95891.72it/s]


fold 0, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:31<00:00,  2.18it/s, ae_loss=147]   
Finetuning:  54%|█████▎    | 268/500 [03:48<03:18,  1.17it/s, ae_loss=119, atten_entropy=0.703, ccindex_val=0.964, cox_loss=1.64, loss=2.83]


Early stopping with best validation ccindex: 0.9715.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9855
testing...
c-index: 0.9966
fold 1, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.15it/s, ae_loss=156]   
Finetuning:  36%|███▌      | 181/500 [02:34<04:32,  1.17it/s, ae_loss=132, atten_entropy=0.704, ccindex_val=0.955, cox_loss=1.6, loss=2.93] 


Early stopping with best validation ccindex: 0.9690.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9815
testing...
c-index: 0.9773
fold 2, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:32<00:00,  2.16it/s, ae_loss=259]   
Finetuning:  40%|███▉      | 199/500 [02:50<04:18,  1.16it/s, ae_loss=230, atten_entropy=0.719, ccindex_val=0.953, cox_loss=1.89, loss=4.21]


Early stopping with best validation ccindex: 0.9705.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9857
testing...
c-index: 0.9579
fold 3, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:32<00:00,  2.17it/s, ae_loss=205]   
Finetuning:  44%|████▍     | 222/500 [03:10<03:58,  1.17it/s, ae_loss=181, atten_entropy=0.7, ccindex_val=0.949, cox_loss=1.58, loss=3.39]  


Early stopping with best validation ccindex: 0.9649.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9811
testing...
c-index: 0.9533
fold 4, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:32<00:00,  2.16it/s, ae_loss=195]   
Finetuning:  39%|███▉      | 197/500 [02:49<04:20,  1.16it/s, ae_loss=172, atten_entropy=0.699, ccindex_val=0.953, cox_loss=1.64, loss=3.36]


Early stopping with best validation ccindex: 0.9680.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9816
testing...
c-index: 0.9867
mean c-index: 0.9744 ± 0.0165
Running 7/9: seed=1, sub_cluster_ratio=0.7, de_facloc=0.2

100%|██████████| 100/100 [00:00<00:00, 107961.49it/s]


fold 0, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:32<00:00,  2.17it/s, ae_loss=129]   
Finetuning:  67%|██████▋   | 336/500 [04:48<02:21,  1.16it/s, ae_loss=96.6, atten_entropy=0.735, ccindex_val=0.928, cox_loss=1.65, loss=2.65]


Early stopping with best validation ccindex: 0.9619.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9790
testing...
c-index: 0.9886
fold 1, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:32<00:00,  2.16it/s, ae_loss=149]   
Finetuning: 100%|██████████| 500/500 [07:10<00:00,  1.16it/s, ae_loss=105, atten_entropy=0.703, ccindex_val=0.948, cox_loss=1.57, loss=2.63]
  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9815
testing...
c-index: 0.9561
fold 2, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:32<00:00,  2.16it/s, ae_loss=151]   
Finetuning:  56%|█████▋    | 282/500 [04:06<03:10,  1.15it/s, ae_loss=116, atten_entropy=0.704, ccindex_val=0.935, cox_loss=2.02, loss=3.19]


Early stopping with best validation ccindex: 0.9646.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9786
testing...
c-index: 0.9551
fold 3, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:32<00:00,  2.16it/s, ae_loss=138]   
Finetuning:  78%|███████▊  | 390/500 [05:46<01:37,  1.12it/s, ae_loss=95.1, atten_entropy=0.704, ccindex_val=0.937, cox_loss=1.61, loss=2.56]


Early stopping with best validation ccindex: 0.9575.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9793
testing...
c-index: 0.9221
fold 4, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:32<00:00,  2.15it/s, ae_loss=117]   
Finetuning: 100%|██████████| 500/500 [07:14<00:00,  1.15it/s, ae_loss=74, atten_entropy=0.686, ccindex_val=0.943, cox_loss=1.59, loss=2.33]  
  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9833
testing...
c-index: 0.9785
mean c-index: 0.9601 ± 0.0230
Running 8/9: seed=1, sub_cluster_ratio=0.7, de_facloc=0.35

100%|██████████| 100/100 [00:00<00:00, 100751.96it/s]


fold 0, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:34<00:00,  2.11it/s, ae_loss=109]   
Finetuning:  55%|█████▌    | 277/500 [04:07<03:19,  1.12it/s, ae_loss=77.7, atten_entropy=0.8, ccindex_val=0.959, cox_loss=1.67, loss=2.55]  


Early stopping with best validation ccindex: 0.9706.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9858
testing...
c-index: 0.9773
fold 1, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.14it/s, ae_loss=149]   
Finetuning:  55%|█████▌    | 277/500 [04:10<03:21,  1.10it/s, ae_loss=117, atten_entropy=0.697, ccindex_val=0.955, cox_loss=1.7, loss=2.87] 


Early stopping with best validation ccindex: 0.9705.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9897
testing...
c-index: 0.9649
fold 2, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.14it/s, ae_loss=127]   
Finetuning:  49%|████▉     | 244/500 [03:35<03:46,  1.13it/s, ae_loss=97.8, atten_entropy=0.732, ccindex_val=0.956, cox_loss=1.63, loss=2.63]


Early stopping with best validation ccindex: 0.9699.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9847
testing...
c-index: 0.9607
fold 3, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:34<00:00,  2.12it/s, ae_loss=175]   
Finetuning:  54%|█████▍    | 270/500 [04:04<03:28,  1.10it/s, ae_loss=139, atten_entropy=0.697, ccindex_val=0.962, cox_loss=1.64, loss=3.03]


Early stopping with best validation ccindex: 0.9727.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9899
testing...
c-index: 0.9708
fold 4, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.14it/s, ae_loss=98]    
Finetuning:  38%|███▊      | 190/500 [02:46<04:31,  1.14it/s, ae_loss=72.2, atten_entropy=0.789, ccindex_val=0.961, cox_loss=1.64, loss=2.45]


Early stopping with best validation ccindex: 0.9727.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9870
testing...
c-index: 0.9816
mean c-index: 0.9710 ± 0.0077
Running 9/9: seed=1, sub_cluster_ratio=0.7, de_facloc=0.5

100%|██████████| 100/100 [00:00<00:00, 100438.31it/s]


fold 0, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.14it/s, ae_loss=92.8]  
Finetuning:  68%|██████▊   | 340/500 [05:10<02:26,  1.09it/s, ae_loss=60.4, atten_entropy=0.727, ccindex_val=0.958, cox_loss=1.51, loss=2.14]


Early stopping with best validation ccindex: 0.9688.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9867
testing...
c-index: 0.9972
fold 1, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:34<00:00,  2.12it/s, ae_loss=122]   
Finetuning:  42%|████▏     | 211/500 [03:10<04:20,  1.11it/s, ae_loss=94, atten_entropy=0.746, ccindex_val=0.964, cox_loss=1.79, loss=2.77]  


Early stopping with best validation ccindex: 0.9724.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9893
testing...
c-index: 0.9883
fold 2, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.13it/s, ae_loss=103]   
Finetuning:  49%|████▉     | 245/500 [03:41<03:50,  1.11it/s, ae_loss=70.7, atten_entropy=0.749, ccindex_val=0.959, cox_loss=1.57, loss=2.33]


Early stopping with best validation ccindex: 0.9711.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9861
testing...
c-index: 0.9719
fold 3, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:35<00:00,  2.10it/s, ae_loss=102]   
Finetuning:  40%|███▉      | 198/500 [02:53<04:25,  1.14it/s, ae_loss=74.8, atten_entropy=0.739, ccindex_val=0.968, cox_loss=1.64, loss=2.43]


Early stopping with best validation ccindex: 0.9726.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9887
testing...
c-index: 0.9610
fold 4, train_size: 80, test_size: 20
Validation mode is enabled, will split 20% of the data for validation.


Pretraining: 100%|██████████| 200/200 [01:33<00:00,  2.15it/s, ae_loss=128]   
Finetuning:  38%|███▊      | 189/500 [02:45<04:31,  1.14it/s, ae_loss=103, atten_entropy=0.75, ccindex_val=0.96, cox_loss=1.67, loss=2.75]  


Early stopping with best validation ccindex: 0.9713.


  model.scaler = None


Added hazard and attention to adata.obs.
Added patient_hazards to surv.
train c-index: 0.9889
testing...
c-index: 0.9939
mean c-index: 0.9825 ± 0.0138
