In [None]:
import numpy as np
import anndata
import scanpy as sc
import matplotlib.pyplot as plt
import sys  
sys.path.insert(1, '../../backend/')
from tsne import compute_tsne_series
from utils import normalizeEmbedding

In [None]:
# this function is copied from Böhm et al. (2022)
# https://github.com/berenslab/ne-spectrum/blob/56e7204710258d541fb716033d3542a4fca2705e/jnb_msc/generator/gauss_line.py#L72
def gauss_clusters(
    n_clusters=10, dim=50, pts_cluster=100, random_state=None, cov=1, stepsize=6,
):

    if random_state is None:
        rng = np.random.RandomState()
    else:
        rng = random_state

    n = n_clusters * pts_cluster

    s = stepsize / np.sqrt(dim)
    means = np.linspace(np.zeros(dim), n_clusters * s, num=n_clusters, endpoint=False)
    cov = np.eye(dim) * cov

    clusters = np.array(
        [rng.multivariate_normal(m, cov, size=(pts_cluster)) for m in means]
    )

    X = np.reshape(clusters, (-1, dim))
    y = np.repeat(np.arange(n_clusters), pts_cluster)
    return X, y

In [None]:
data, labels = gauss_clusters()
print(data.shape)

In [None]:
adata = anndata.AnnData(X=data, obs={"labels": labels.astype(int)})

# PCA

In [None]:
sc.pp.pca(adata, n_comps=2, zero_center=True)
plt.scatter(adata.obsm['X_pca'][:,0], adata.obsm['X_pca'][:,1], c=labels)

# UMAP

In [None]:
sc.pp.neighbors(adata, n_neighbors=30, metric="euclidean")
sc.tl.umap(adata, min_dist=0.2, init_pos="X_pca")

In [None]:
plt.scatter(adata.obsm['X_umap'][:,0], adata.obsm['X_umap'][:,1], c=labels)

In [None]:
%%capture
tsne_embs = compute_tsne_series(adata.X, max_exaggeration=5, 
                                 fpath_prefix="./tSNE", 
                                 hd_metric="euclidean", init=adata.obsm["X_pca"])

# Store embeddings in anndata

In [None]:
# tSNE
for name, embedding in tsne_embs.items():
    adata.obsm[name] = normalizeEmbedding(embedding)

In [None]:
adata.obsm["PCA"] = normalizeEmbedding(adata.obsm["X_pca"][:, 0:2])
adata.obsm["UMAP"] = normalizeEmbedding(adata.obsm["X_umap"])
adata.uns["methods"] = {
    "UMAP": ["UMAP"],
    "PCA": ["PCA"],
    "tSNE": list(tsne_embs.keys()),
}

In [None]:
if "X_pca" in adata.obsm_keys():
    del adata.obsm["X_pca"]
if "X_umap" in adata.obsm_keys():
    del adata.obsm["X_umap"]
adata.write(filename="./gauss_line.h5ad", compression="gzip")