In [None]:
import numpy as np
import anndata
import scanpy as sc
import matplotlib.pyplot as plt
import sys  
import json
sys.path.insert(1, '../../backend/')
from tsne import compute_tsne_series
from dataset import Dataset as TraceData

In [None]:
# First, download the Mammoth data from Wang et al. (2021)
# https://github.com/YingfanWang/PaCMAP/blob/master/data/mammoth_3d_50k.json
with open('./mammoth_3d_50k.json', 'r') as f:
    data = json.load(f)
data = np.array(data)

In [None]:
adata = anndata.AnnData(data)
sc.pp.neighbors(adata, n_neighbors=30, use_rep='X', metric="euclidean")
sc.tl.louvain(adata, resolution=0.1)

# Embeddings

In [None]:
pca_emb = sc.pp.pca(data, n_comps=2, zero_center=True)
pca_emb = -1* pca_emb
plt.scatter(pca_emb[:, 0], pca_emb[:, 1])
plt.gca().set_aspect('equal')

In [None]:
tsne_embs = compute_tsne_series(
                data = data,
                coarse_exag_iter=[(12, 200), (10, 200)],
                fine_exag_iter=[(10, 200), (5, 200), (3, 200), (1, 200)],
                fpath_prefix="./mammoth",
                hd_metric= "euclidean",
                init= pca_emb,
                sampling_frac=0.1,
                smoothing_perplexity=30,
                negative_gradient_method="fft"
)

# Quality Measures and HD neighbors

In [None]:
trace_data = TraceData(
    hd_data=data,
    name="Mammoth",
    verbose=True,
    hd_metric="euclidean",
)

In [None]:
trace_data.add_metadata(adata.obs)

In [None]:
trace_data.add_embedding(
    name= "PCA",
    embedding = pca_emb,
    category="PCA",
)

for exag, emb in tsne_embs.items():
    trace_data.add_embedding(
        name= f"tSNE_{exag}",
        embedding = emb,
        category="tSNE"
    )

In [None]:
trace_data.precompute_HD_neighbors(maxK=200)
trace_data.compute_neighborhood_preservation(
    neighborhood_sizes=[200, 100, 50]
)
trace_data.compute_global_distance_correlation(
    max_landmarks=1000, LD_landmark_neighbors=True
)

In [None]:
trace_data.print_quality()
trace_data.align_embeddings(reference_embedding="PCA")
trace_data.save_adata(filename="./mammoth.h5ad")