In [None]:
import sys
import seaborn as sns
import numpy as np
import anndata as ad
import os
import pandas as pd

import json
sys.path.insert(1, '../../backend/')
from tsne import compute_tsne_series
from dataset import Dataset as TraceData

### Load human immune data

In [None]:
### Load adata
baseFolder='./'
filepath = 'immune_hvg.h5ad'
adataImmune = ad.read_h5ad(os.path.join(baseFolder, filepath))

In [None]:
adataImmune.obs['celltype'] = adataImmune.obs['final_annotation'].copy()
del adataImmune.obs['final_annotation']
del adataImmune.uns['pca']

In [None]:
adataImmune

In [None]:
batch_colors = {
    "10X": "#EBAC23",
    "Oetjen_U": "#B80058",
    "Freytag": "#008CF9",
    "Oetjen_P": "#006E00",
    "Oetjen_A": "#00BBAD",
    "Sun_sample4_TC": "#D163E6",
    "Sun_sample3_TB": "#B24502",
    "Sun_sample2_KC": "#FF9287",
    "Sun_sample1_CS": "#5954D6",
    "Villani": "#00C6F8",
}

cell_colors = {
    "CD4+ T cells": "#1f77b4",
    "CD14+ Monocytes": "#ff7f0e",
    "CD20+ B cells": "#279e68",
    "NKT cells": "#d62728",
    "NK cells": "#aa40fc",
    "CD8+ T cells": "#8c564b",
    "Erythrocytes": "#e377c2",
    "Monocyte-derived dendritic cells": "#b5bd61",
    "CD16+ Monocytes": "#17becf",
    "HSPCs": "#aec7e8",
    "Erythroid progenitors": "#ffbb78",
    "Plasmacytoid dendritic cells": "#98df8a",
    "Monocyte progenitors": "#ff9896",
    "Megakaryocyte progenitors": "#c5b0d5",
    "CD10+ B cells": "#c49c94",
    "Plasma cells": "#f7b6d2",
}
adataImmune.uns["celltype_colors"] = list(cell_colors.values())
adataImmune.uns["batch_colors"] = list(batch_colors.values())

# Embeddings

In [None]:
tsne_embs = compute_tsne_series(
                data = adataImmune.obsm["X_pca"],
                coarse_exag_iter=[(12, 200), (10, 200)],
                fine_exag_iter=[(10, 200), (5, 200), (3, 200), (1, 200)],
                fpath_prefix="./immune",
                hd_metric= "euclidean",
                init= adataImmune.obsm["X_pca"][:, 0:2],
                sampling_frac=0.1,
                smoothing_perplexity=30,
                negative_gradient_method="fft"
)

In [None]:
trace_data = TraceData(
    adata=adataImmune, 
    name="Human Immune", 
    hd_data_key="X_pca",
    verbose=True, 
    hd_metric="euclidean"
    )

In [None]:
trace_data.add_embedding(
    name= "PCA",
    embedding = adataImmune.obsm["X_pca"][:, 0:2],
    category="PCA",
)

for exag, emb in tsne_embs.items():
    trace_data.add_embedding(
        name= f"tSNE_{exag}",
        embedding = emb,
        category="tSNE"
    )

In [None]:
trace_data.precompute_HD_neighbors(maxK=200)
trace_data.compute_neighborhood_preservation(
    neighborhood_sizes=[200, 100, 50]
)
trace_data.compute_global_distance_correlation(
    max_landmarks=1000, LD_landmark_neighbors=True
)

In [None]:
trace_data.print_quality()
trace_data.align_embeddings(reference_embedding="PCA")
trace_data.save_adata(filename="./immune.h5ad")

# Adding Triplet Accuracy

In [None]:
trace_data = TraceData(
    filepath="./immune.h5ad",
    name="Human Immune", 
    hd_data_key="X_pca",
    verbose=True, 
    hd_metric="euclidean"
)
trace_data.compute_random_triplet_accuracy(num_triplets=25)
trace_data.print_quality()
trace_data.save_adata(filename="./immune_with_triplets.h5ad")