In [None]:
import numpy as np
import anndata
import scanpy as sc
import matplotlib.pyplot as plt
import sys  
import json
sys.path.insert(1, '../../backend/')
from tsne import compute_tsne_series
from utils import normalizeEmbedding

In [None]:
### Load adata
import pickle
baseFolder='./'
data = pickle.load(open(baseFolder+"post_adata_mouseCD45neg.pkl", "rb"))

In [None]:
adata = anndata.AnnData(data)

In [None]:
# these are the interesting metadata features
# sampleID is the same as sampleName, sample are integers
adata.obs['sampleName'] = adata.obs['sampleName'].astype('category')
adata.obs['louvain'] = adata.obs['louvain'].astype('category')
adata.obs['type'] = adata.obs['type'].astype('category')
# annotID is the same as annot
adata.obs['annot'] = adata.obs['annot'].astype('category')

In [None]:
adata.obsm["PCA"] = sc.pp.pca(adata.obsm["X_totalVI"], n_comps=2, zero_center=True)
plt.scatter(adata.obsm['PCA'][:,0], adata.obsm['PCA'][:,1], c=adata.obs['annot'].cat.codes, cmap='tab20')

In [None]:
sc.pp.neighbors(adata, use_rep="X_totalVI", n_neighbors=30, metric="correlation")
sc.tl.umap(adata, min_dist=0.2, init_pos=adata.obsm["PCA"], random_state=42)

In [None]:
plt.scatter(adata.obsm['X_umap'][:,0], adata.obsm['X_umap'][:,1], c=adata.obs['annot'].cat.codes, cmap='tab20')

# Adding Embeddings

In [None]:
adata.uns["methods"] = {}

In [None]:
adata.obsm["PCA"] = normalizeEmbedding(adata.obsm["PCA"])
adata.uns["PCA"] = {}
adata.uns["methods"]["PCA"] = ["PCA"]
adata.obsm["UMAP"] = normalizeEmbedding(adata.obsm["X_umap"])
adata.uns["UMAP"] = {}
adata.uns["methods"]["UMAP"] = ["UMAP"]

# t-SNE

In [None]:
# center totalVI embeddings
adata.obsm['X_totalVI'] = adata.obsm['X_totalVI'] - np.mean(adata.obsm['X_totalVI'], axis=1, keepdims=True)

In [None]:
tsne_embs = compute_tsne_series(adata.obsm["X_totalVI"], 
                                max_exaggeration=5, 
                                 fpath_prefix="./mouseCD45neg_tsne", 
                                 hd_metric="cosine", 
                                 init=adata.obsm["PCA"])

# Store embeddings in anndata

In [None]:
adata.uns["methods"]["tSNE"] = []
for name, embedding in tsne_embs.items():
    adata.obsm[name] = normalizeEmbedding(embedding)
    adata.uns[name] = {}
    adata.uns["methods"]["tSNE"] = adata.uns["methods"]["tSNE"] + [name]

In [None]:
if "X_pca" in adata.obsm_keys():
    del adata.obsm["X_pca"]
if "X_umap" in adata.obsm_keys():
    del adata.obsm["X_umap"]
    
delete_obs = [ob for ob in adata.obs_keys() if "adt" in ob]
for delob in delete_obs:
    del adata.obs[delob]

In [None]:
adata

In [None]:
adata.write(filename="./CD45_PCA_init.h5ad", compression="gzip")