In [None]:
import sys
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import seaborn as sns
# from datetime import datetime

import numpy as np
import anndata as ad
import os
# import numpy.random as random
import pandas as pd
import scanpy as sc
# import louvain
import json
# import torch

# # Control UMAP numba warnings
# import warnings; warnings.simplefilter('ignore')

# %matplotlib inline

### Load human immune data

In [None]:
### Load adata
baseFolder='./'
filepath = 'immune_processed.h5ad'
adataImmune = ad.read_h5ad(os.path.join(baseFolder, filepath))

In [None]:
#adataImmune.obs = adataImmune.obs.replace('final_annotation', "celltype")
adataImmune.obs['celltype'] = adataImmune.obs['final_annotation'].copy()
del adataImmune.obs['final_annotation']
del adataImmune.uns['pca']

In [None]:
adataImmune

In [None]:
np.argsort(adataImmune.varm["PCs"][:, 3])[::-1]
#adataImmune.var_names[np.argsort(np.var(adataImmune.X[[0,1,2,3], :], axis=0))]

In [None]:
batch_colors = {
    "10X": "#EBAC23",
    "Oetjen_U": "#B80058",
    "Freytag": "#008CF9",
    "Oetjen_P": "#006E00",
    "Oetjen_A": "#00BBAD",
    "Sun_sample4_TC": "#D163E6",
    "Sun_sample3_TB": "#B24502",
    "Sun_sample2_KC": "#FF9287",
    "Sun_sample1_CS": "#5954D6",
    "Villani": "#00C6F8",
}

cell_colors = {
    "CD4+ T cells": "#1f77b4",
    "CD14+ Monocytes": "#ff7f0e",
    "CD20+ B cells": "#279e68",
    "NKT cells": "#d62728",
    "NK cells": "#aa40fc",
    "CD8+ T cells": "#8c564b",
    "Erythrocytes": "#e377c2",
    "Monocyte-derived dendritic cells": "#b5bd61",
    "CD16+ Monocytes": "#17becf",
    "HSPCs": "#aec7e8",
    "Erythroid progenitors": "#ffbb78",
    "Plasmacytoid dendritic cells": "#98df8a",
    "Monocyte progenitors": "#ff9896",
    "Megakaryocyte progenitors": "#c5b0d5",
    "CD10+ B cells": "#c49c94",
    "Plasma cells": "#f7b6d2",
}
adataImmune.uns["celltype_colors"] = list(cell_colors.values())
adataImmune.uns["batch_colors"] = list(batch_colors.values())

In [None]:
adataImmune.uns['methods'] = {'PCA': [], 'UMAP': [], 'tSNE': [], 'tSNE_skrodzi': [], 'PaCMAP': []}

In [None]:
def normalizeEmbedding(arr):
    min = np.min(arr)
    diff = np.max(arr) - min
    arr = (2 * ((arr - min) / diff)) - 1
    return arr

In [None]:
def addEmbedding(method, fileHandle, embedding_key):
    embedding = json.load(fileHandle)
    adataImmune.uns[embedding_key] = {'neighborhood preservation k=50': np.asarray(embedding['qnx@50'], dtype=np.float32),
                                    'neighborhood preservation k=200': np.asarray(embedding['qnx@200'], dtype=np.float32)}
    embedding_arr = np.asarray([embedding["x"], embedding["y"]], dtype=np.float32).T
    adataImmune.obsm[embedding_key] = normalizeEmbedding(embedding_arr)
    adataImmune.uns["methods"][method] = adataImmune.uns["methods"][method] + [embedding_key]

In [None]:
# PCA
pca_f = open(os.path.join(baseFolder, 'embeddings', f'pca_embedding_quality.json'))
addEmbedding('PCA', pca_f, 'PCA')

In [None]:
# T-SNE
tsne_fnames = [
    ("tSNE (exag 5, perp 335)", "multiscaletsne_embedding_exg_5_perp_335_quality.json"),
    ("tSNE (exag 4, perp 259)", "multiscaletsne_embedding_exg_4_perp_259_quality.json"),
    ("tSNE (exag 3, perp 183)", "multiscaletsne_embedding_exg_3_perp_183_quality.json"),
    ("tSNE (exag 2, perp 107)", "multiscaletsne_embedding_exg_2_perp_107_quality.json"),
    ("tSNE (exag 1, perp 30)", "multiscaletsne_embedding_exg_1_perp_30_quality.json"),
    (
        "denSNE (exag 1, perp 30)",
        "multiscaletsne_embedding_exg_1_perp_30_densne_quality.json",
    ),
]

for name, filename in tsne_fnames:
    with open(os.path.join(baseFolder, "embeddings", filename)) as f:
        addEmbedding("tSNE", f, name)

In [None]:
# UMAP
for i in range(6):
    with open(os.path.join(baseFolder, 'embeddings', f'umap_{i}_quality.json')) as f:
        addEmbedding('UMAP', f, f'UMAP {i}')

In [None]:
# PaCMAP
pacmap_fnames = ['pacmap_embedding_quality.json',
                 'pacmap_harmony_0_quality.json',
                 'pacmap_harmony_cd20bcell_refinement_quality.json']
for i, fname in enumerate(pacmap_fnames):
    with open(os.path.join(baseFolder, 'embeddings', fname)) as f:
        addEmbedding('PaCMAP', f, f'PaCMAP {i}')

In [None]:
# t-SNE Skrodzi
adataImmune.uns['tSNE_skrodzi'] = {}

for i in range(5):
    with open(os.path.join(baseFolder, 'embeddings', f'tsne_skrodzi_{i}.csv')) as f:
        adataImmune.uns[f"tSNE_skrodzi_{i}"] = {}
        embedding = np.loadtxt(f, delimiter=",")
        embedding_arr = np.asarray([embedding[:, 0], embedding[:, 1]], dtype=np.float32).T
        adataImmune.obsm[f"tSNE_skrodzi_{i}"] = normalizeEmbedding(embedding_arr)
        adataImmune.uns["methods"]["tSNE_skrodzi"] = adataImmune.uns["methods"]["tSNE_skrodzi"] + [f"tSNE_skrodzi_{i}"]

In [None]:
adataImmune

In [None]:
adataImmune.write_h5ad(os.path.join(baseFolder, 'immune_with_embeddings_new.h5ad'), compression='gzip')