In [1]:
import celltypist
from celltypist import models
import scanpy as sc
import pandas as pd 
import numpy as np
import anndata
import re
import h5py
import scipy.sparse as scs
import concurrent.futures
import scanpy.external as sce
import gc
from concurrent.futures import ThreadPoolExecutor

In [13]:
def run_leiden(adata, resolution, key_added):
    adata_clustering = sc.tl.leiden(adata, resolution=resolution, key_added=key_added, copy=True)
    return adata_clustering.obs

def run_leiden_parallel(adata_subset, tasks):
    with ThreadPoolExecutor(max_workers=3) as executor:
        futures = [executor.submit(run_leiden, adata_subset, resolution, key_added) for resolution, key_added in tasks]
        results = [future.result() for future in futures]

    # Assign the results back to the original AnnData object
    for result, (_, key_added) in zip(results, tasks):
        adata_subset.obs[key_added] = result[key_added]

    return adata_subset

In [2]:
adata_subset=sc.read_h5ad('NK/NKcells_processed_20231107.h5ad')

In [3]:
adata_subset=adata_subset.raw.to_adata()

In [4]:
adata_subset=adata_subset[~adata_subset.obs['leiden_resolution_1.5'].isin(['9','11','14',15])]

In [6]:
sc.pp.normalize_total(adata_subset, target_sum=1e4)

In [7]:
sc.pp.log1p(adata_subset)
sc.pp.highly_variable_genes(adata_subset)
adata_subset = adata_subset[:, adata_subset.var_names[adata_subset.var['highly_variable']]]



In [8]:
sc.pp.scale(adata_subset)

In [9]:
sc.tl.pca(adata_subset, svd_solver='arpack')

In [10]:
sce.pp.harmony_integrate(adata_subset, 'cohort.cohortGuid',max_iter_harmony = 30)

2023-11-08 20:11:51,331 - harmonypy - INFO - Computing initial centroids with sklearn.KMeans...
Computing initial centroids with sklearn.KMeans...
2023-11-08 20:13:06,961 - harmonypy - INFO - sklearn.KMeans initialization complete.
sklearn.KMeans initialization complete.
2023-11-08 20:13:07,741 - harmonypy - INFO - Iteration 1 of 30
Iteration 1 of 30
2023-11-08 20:14:30,603 - harmonypy - INFO - Iteration 2 of 30
Iteration 2 of 30
2023-11-08 20:15:57,346 - harmonypy - INFO - Iteration 3 of 30
Iteration 3 of 30
2023-11-08 20:17:21,962 - harmonypy - INFO - Iteration 4 of 30
Iteration 4 of 30
2023-11-08 20:18:18,061 - harmonypy - INFO - Converged after 4 iterations
Converged after 4 iterations


In [11]:
sc.pp.neighbors(adata_subset, n_neighbors=50,use_rep='X_pca_harmony', n_pcs=30)

In [12]:
sc.tl.umap(adata_subset)

In [None]:
tasks = [(1, "leiden_resolution_1"),(1.5, "leiden_resolution_1.5"),(2, "leiden_resolution_2")]
adata_subset = run_leiden_parallel(adata_subset, tasks)