In [1]:
import pandas as pd
import numpy as np
import scanpy as sc
import matplotlib.pyplot as plt
import os
import seaborn as sns
from DeepScence.api import DeepScence
from tqdm import tqdm
import random

import squidpy as sq
import spatialdata as sd
# from spatialdata_io import xenium

from SenCID.api import SenCID
from SenCID.Pred import GetFeatures
from dca.api import dca
from sklearn.metrics import roc_curve, auc, roc_auc_score

os.chdir(b'/Users/lele/Library/Mobile Documents/com~apple~CloudDocs/Research/Aging')

我困得要死


## sensitivity analysis

In [9]:
adata = sc.read_h5ad("./data/in_vitro/scored_h5ad/subsets/hayflick_dca.h5ad")
adata.X = adata.raw.X
panel = pd.read_csv('./data/Xenium/Xenium_hMulti_v1_metadata.csv')
panel = np.intersect1d(panel["Gene"].values, adata.var_names)
xdata = adata[:,panel].copy()

with open("./code/SenCID/SenCID/resource/seneset.txt", "r") as f:
    CID_features = [line.strip() for line in f]
core_gs = pd.read_csv("./data/coreGS_v2.csv", index_col=0)
core_gs = list(core_gs[core_gs["occurrence"]>=5].index)


#### Check library sizes

In [None]:
xdata.obs['n_counts'] = xdata.X.sum(axis=1)
plt.hist(xdata.obs["n_counts"], bins=100)
plt.title("simulated n_counts")
plt.show()

#### Analysis

In [10]:
final_results = []
step_size = 20
max_len = 100
args = {
    "binarize": False,
    "verbose": False,
    "denoise": False,
    "lambda_ortho": 1,
    "random_state": 0,
}

for run in range(5):
    seed = random.randint(0, 1000)
    random.seed(seed)
    args["random_state"] = seed
    core_candidate = [gene for gene in core_gs if gene not in xdata.var_names and gene in adata.var_names]
    CID_features_candidate = [gene for gene in CID_features if gene not in xdata.var_names and gene in adata.var_names]
    
    panel_DS = np.array(panel, copy=True)
    panel_CID = np.array(panel, copy=True)
    
    
    results = []
    for i in range(0, max_len, step_size):
        genes_added = i + step_size
        
        # For DS: Expand by order
        num_genes_to_add_DS = min(step_size, len(core_candidate) - i)
        if num_genes_to_add_DS > 0:
            panel_DS = np.append(panel_DS, core_candidate[i:i+num_genes_to_add_DS])
    
        # For CID: Expand by randomly selecting genes
        num_genes_to_add_CID = min(step_size, len(CID_features_candidate))
        if num_genes_to_add_CID > 0:
            selected_genes = random.sample(CID_features_candidate, num_genes_to_add_CID)
            panel_CID = np.append(panel_CID, selected_genes)
            CID_features_candidate = [gene for gene in CID_features_candidate if gene not in selected_genes]
    
        # Print the length of the panels after each loop iteration
        print(len(panel_DS))
        print(len(panel_CID))
    
        # construct simulated Xenium
        xdata_DS = adata[:, panel_DS].copy()
        xdata_CID = adata[:, panel_CID].copy()
        sc.pp.filter_cells(xdata_CID, min_genes=1)
        sc.pp.filter_cells(xdata_DS, min_genes=1)
    
        # run SenCID
        # sc.pp.filter_genes(xdata_CID, min_cells=1)
        # dca(xdata_CID, check_counts=False)
        pred_dict, recSID, tmpfiles = SenCID(
            adata=xdata_CID,
            sidnums=[1, 2, 3, 4, 5, 6],
            denoising=False,
            binarize=True,
            threads=1,
            savetmp=True,
        )
        scores = []
        for i in range(len(recSID)):
            rec = recSID["RecSID"].iloc[i]
            score = pred_dict[rec]["SID_Score"].iloc[i]
            scores.append(score)
        scores_df = pd.DataFrame(index=xdata_CID.obs_names, columns=["score"])
        scores_df["score"] = 0.5
        calculated_scores_df = pd.DataFrame(scores, index=recSID.index, columns=["score"])
        scores_df.update(calculated_scores_df)
        scores_CID = scores_df["score"].values
    
        # run DeepScence
        xdata_DS = DeepScence(xdata_DS, **args)
    
        # Calculate AUROC
        auroc_DS = roc_auc_score(xdata_DS.obs["SnC"].values, xdata_DS.obs["ds"].values)
        auroc_CID = roc_auc_score(xdata_CID.obs["SnC"].values, scores_CID)
        final_results.append({"method": "SenCID", "genes_added": genes_added, "AUC": auroc_CID})
        final_results.append({"method": "DeepScence", "genes_added": genes_added, "AUC": auroc_DS})
final_results = pd.DataFrame(final_results)
final_results.to_csv("./data/Xenium/results/WI38_results.csv")

163
163
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:19] Input is preprocessed, preprocessed 163 genes and 2968 cells.


Finished. Giving SID scores and SID Recommendation...


[2024-10-02 20:19] Using 26 genes in the gene set for scoring
[2024-10-02 20:19] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:19] Training on 2672 cells, validate on 296 cells.


171
183
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:19] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:19] Using 34 genes in the gene set for scoring
[2024-10-02 20:19] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:19] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
203
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:20] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:20] Using 34 genes in the gene set for scoring
[2024-10-02 20:20] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:20] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
223
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:20] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:20] Using 34 genes in the gene set for scoring
[2024-10-02 20:20] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:20] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
243
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:20] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:20] Using 34 genes in the gene set for scoring
[2024-10-02 20:20] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:20] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
163
163
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:21] Input is preprocessed, preprocessed 163 genes and 2968 cells.
[2024-10-02 20:21] Using 26 genes in the gene set for scoring
[2024-10-02 20:21] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:21] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
183
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:21] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:21] Using 34 genes in the gene set for scoring
[2024-10-02 20:21] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:21] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
203
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:21] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:21] Using 34 genes in the gene set for scoring
[2024-10-02 20:21] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:21] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
223
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:21] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:21] Using 34 genes in the gene set for scoring
[2024-10-02 20:21] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:21] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
243
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:22] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:22] Using 34 genes in the gene set for scoring
[2024-10-02 20:22] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:22] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
163
163
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:22] Input is preprocessed, preprocessed 163 genes and 2968 cells.
[2024-10-02 20:22] Using 26 genes in the gene set for scoring
[2024-10-02 20:22] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:22] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
183
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:22] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:22] Using 34 genes in the gene set for scoring
[2024-10-02 20:22] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:22] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
203
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:23] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:23] Using 34 genes in the gene set for scoring
[2024-10-02 20:23] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:23] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
223
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:23] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:23] Using 34 genes in the gene set for scoring
[2024-10-02 20:23] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:23] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
243
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:23] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:23] Using 34 genes in the gene set for scoring
[2024-10-02 20:23] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:23] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
163
163
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:23] Input is preprocessed, preprocessed 163 genes and 2968 cells.
[2024-10-02 20:23] Using 26 genes in the gene set for scoring
[2024-10-02 20:23] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:23] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
183
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:24] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:24] Using 34 genes in the gene set for scoring
[2024-10-02 20:24] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:24] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
203
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:24] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:24] Using 34 genes in the gene set for scoring
[2024-10-02 20:24] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:24] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
223
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:24] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:24] Using 34 genes in the gene set for scoring
[2024-10-02 20:24] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:24] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
243
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:24] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:24] Using 34 genes in the gene set for scoring
[2024-10-02 20:24] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:24] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
163
163
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:25] Input is preprocessed, preprocessed 163 genes and 2968 cells.
[2024-10-02 20:25] Using 26 genes in the gene set for scoring
[2024-10-02 20:25] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:25] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
183
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:25] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:25] Using 34 genes in the gene set for scoring
[2024-10-02 20:25] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:25] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
203
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:25] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:25] Using 34 genes in the gene set for scoring
[2024-10-02 20:25] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:25] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
223
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:26] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:26] Using 34 genes in the gene set for scoring
[2024-10-02 20:26] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:26] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
171
243
Scaling data...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


[2024-10-02 20:26] Input is preprocessed, preprocessed 171 genes and 2968 cells.
[2024-10-02 20:26] Using 34 genes in the gene set for scoring
[2024-10-02 20:26] Lambda provided, capturing scores in 2 neurons.
[2024-10-02 20:26] Training on 2672 cells, validate on 296 cells.


Finished. Giving SID scores and SID Recommendation...
