In [None]:
import numpy as np
import anndata
import scanpy as sc
import matplotlib.pyplot as plt
import sys  
import pickle
import json
sys.path.insert(1, '../../backend/')
from tsne import compute_tsne_series
from dataset import Dataset as TraceData

In [None]:
baseFolder='./'
data = pickle.load(open(baseFolder+"post_adata_mouseCD45neg.pkl", "rb"))

In [None]:
adata = anndata.AnnData(data)

In [None]:
# these are the interesting metadata features
# sampleID is the same as sampleName, sample are integers
adata.obs['sampleName'] = adata.obs['sampleName'].astype('category')
adata.obs['louvain'] = adata.obs['louvain'].astype('category')
adata.obs['type'] = adata.obs['type'].astype('category')
# annotID is the same as annot
adata.obs['annot'] = adata.obs['annot'].astype('category')

if 'annotID' in adata.obs_keys():
    del adata.obs['annotID']
if 'sampleID' in adata.obs_keys():
    del adata.obs['sampleID']

# use the "cell" column as index for the obs dataframe
adata.obs.index = adata.obs['cell']
adata.obs.index.name = None
del adata.obs['cell']

delete_obs = [ob for ob in adata.obs_keys() if "adt" in ob]
for delob in delete_obs:
    del adata.obs[delob]

In [None]:
pca_emb = sc.pp.pca(adata.obsm["X_totalVI"], n_comps=2, zero_center=True)

In [None]:
sc.pp.neighbors(adata, use_rep="X_totalVI", n_neighbors=30, metric="correlation")
sc.tl.umap(adata, min_dist=0.2, init_pos=pca_emb, random_state=42)

umap_emb = adata.obsm["X_umap"]
del adata.obsm["X_umap"]

In [None]:
# center totalVI embeddings
adata.obsm['X_totalVI'] = adata.obsm['X_totalVI'] - np.mean(adata.obsm['X_totalVI'], axis=1, keepdims=True)

In [None]:
tsne_embs = compute_tsne_series(
    data=adata.obsm["X_totalVI"],
    init=pca_emb,
    hd_metric="cosine",
    coarse_exag_iter=[(12, 200)],
    fine_exag_iter=[(10, 200), (5, 200), (3, 200), (1, 200)],
    fpath_prefix="./mouseCD45neg", 
    sampling_frac=0.1,
    smoothing_perplexity=30,
)

# Embedding Quality and HD Neighbors

In [None]:
trace_data = TraceData(
    name="Mouse CD45neg",
    adata=adata,
    hd_metric="cosine",
    hd_data_key="X_totalVI",
    verbose=True
)

In [None]:
trace_data.add_embedding(
    name= "PCA",
    embedding = pca_emb,
    category="PCA",
)

trace_data.add_embedding(
    name= "UMAP",
    embedding = umap_emb,
    category="UMAP",
)

for exag, emb in tsne_embs.items():
    trace_data.add_embedding(
        name= f"tSNE_{exag}",
        embedding = emb,
        category="tSNE"
    )

In [None]:
trace_data.compute_quality()
trace_data.print_quality()