In [None]:
import numpy as np
import anndata
import scanpy as sc
import matplotlib.pyplot as plt
import sys  
import json
sys.path.insert(1, '../../backend/')
from tsne import compute_tsne_series
from utils import normalizeEmbedding

In [None]:
# First, download the Mammoth data from Wang et al. (2021)
# https://github.com/YingfanWang/PaCMAP/blob/master/data/mammoth_3d_50k.json
with open('./mammoth_3d_50k.json', 'r') as f:
    data = json.load(f)
data = np.array(data)

In [None]:
adata = anndata.AnnData(data)
sc.pp.neighbors(adata, n_neighbors=30, use_rep='X', metric="euclidean")
sc.tl.louvain(adata, resolution=0.1)

# PCA

In [None]:
sc.pp.pca(adata, n_comps=2, zero_center=True)
adata.obsm["X_pca"] = -1*adata.obsm["X_pca"]
plt.scatter(adata.obsm['X_pca'][:,0], adata.obsm['X_pca'][:,1])

# UMAP

In [None]:
sc.tl.umap(adata, min_dist=0.2, init_pos="X_pca")

In [None]:
plt.scatter(adata.obsm['X_umap'][:,0], adata.obsm['X_umap'][:,1])

In [None]:
%%capture
tsne_embs = compute_tsne_series(adata.X, max_exaggeration=5, 
                                 fpath_prefix="./mammoth_tsne", 
                                 hd_metric="euclidean", 
                                 init=adata.obsm["X_pca"])

# Store embeddings in anndata

In [None]:
# tSNE
for name, embedding in tsne_embs.items():
    adata.obsm[name] = normalizeEmbedding(embedding)
    adata.uns[name] = {}

In [None]:
adata.obsm["PCA"] = normalizeEmbedding(adata.obsm["X_pca"][:, 0:2])
adata.obsm["UMAP"] = normalizeEmbedding(adata.obsm["X_umap"])
adata.uns["UMAP"] = {}
adata.uns["PCA"] = {}
adata.uns["methods"] = {
    "UMAP": ["UMAP"],
    "PCA": ["PCA"],
    "tSNE": list(tsne_embs.keys()),
}

In [None]:
if "pca" in adata.uns_keys():
    adata.uns["PCA"]["params"] = adata.uns["pca"]["params"]
    del adata.uns["pca"]
if "umap" in adata.uns_keys():
    adata.uns["UMAP"]["params"] = adata.uns["umap"]["params"]
    del adata.uns["umap"]

In [None]:
if "X_pca" in adata.obsm_keys():
    del adata.obsm["X_pca"]
if "X_umap" in adata.obsm_keys():
    del adata.obsm["X_umap"]
adata.write(filename="./mammoth.h5ad", compression="gzip")