In [1]:
import numpy as np
import SPECTRA as spc
import spectra_util as util
import json 
import scanpy as sc
import K_est
from collections import OrderedDict
import scipy 

## Load gene sets

In [2]:
with open("global0.1_delta_0.001_beta_0.0_pseudo_0.0gene_names_dict.json", "rb") as file:
    annotations = json.load(file)

In [3]:
annotations.keys()

dict_keys(['global', 'B_memory', 'CD4_T', 'CD8_T', 'DC', 'MDC', 'Treg', 'mast', 'pDC'])

## Load data & create a new cell type label 

In [4]:
adata = sc.read_h5ad("all_scseq_results.h5ad")

Subsetting the dataset to the following immune cell types

In [5]:
immune_types = ["T_cell", "Monocyte/Macrophage", "NK cell", "T cell", "Plasma/B cell", "Neutrophil"]

In [6]:
is_immune = adata.obs.broad_clusters.apply(lambda x: x in immune_types).values.astype(bool)
adata_immune = adata[is_immune,:]

Create a new cell type label that combines T and NK cells, etc

In [7]:
type_transfer_dict =  {
    "T cell": "TNK",
    "Monocyte/Macrophage" : "M",
    "NK cell": "TNK",
    "Plasma/B cell": "B",
    "Neutrophil" : "Neutrophil"
}

In [8]:
ct_labels = adata_immune.obs.broad_clusters.apply(lambda x: type_transfer_dict[x])

In [9]:
adata_immune.obs["cell_type"] = ct_labels

Trying to set attribute `.obs` of view, copying.


## Create a dictionary for SPECTRA input

In [10]:
annotations.keys()

dict_keys(['global', 'B_memory', 'CD4_T', 'CD8_T', 'DC', 'MDC', 'Treg', 'mast', 'pDC'])

In [11]:
annotations_pdac = {}

In [12]:

annotations_pdac["global"] = annotations["global"]
for ct in type_transfer_dict.values():
    annotations_pdac[ct] = {}

In [13]:

for key in annotations["B_memory"].keys():
    annotations_pdac["B"][key] = annotations["B_memory"][key]
for key in annotations["CD4_T"].keys():
    annotations_pdac["TNK"][key] = annotations["CD4_T"][key]
for key in annotations["CD8_T"].keys():
    annotations_pdac["TNK"][key] = annotations["CD8_T"][key]
for key in annotations["Treg"].keys():
    annotations_pdac["TNK"][key] = annotations["Treg"][key]
for key in annotations["MDC"].keys():
    annotations_pdac["M"][key] = annotations["MDC"][key]

## K estimation

In [14]:
X = np.array(adata_immune[:,adata_immune.var["highly_variable"]].X.todense())

In [15]:
X.shape

(117077, 1810)

In [16]:
# don't rerun this unless necessary
#L_est = K_est.estimate_L(adata_immune, attribute = "cell_type", highly_variable = True)

In [17]:
#save result 

L = {'global': 20, 'B': 31, 'M': 15, 'Neutrophil': 3, 'TNK': 13}

In [18]:
L

{'global': 20, 'B': 31, 'M': 15, 'Neutrophil': 3, 'TNK': 13}

## Fit SPECTRA model 

> don't rerun this

In [None]:
model = spc.est_spectra(adata = adata_immune, L = L, gene_set_dictionary = annotations_pdac, use_highly_variable = True, cell_type_key = "cell_type", use_weights = True, lam = 0.1, delta=0.001,kappa = 0.00001, rho = 0.00001, use_cell_types = True, n_top_vals = 25)

 23%|██▎       | 2301/10000 [6:06:03<20:07:31,  9.41s/it]

In [None]:
import torch
#haven't written save function yet 
torch.save(model.internal_model.state_dict(), "pdac_model")