In [None]:
import numpy as np
import scanpy as sc
import sys 
import matplotlib.pyplot as plt
sys.path.insert(1, '../../backend/')
from tsne import compute_tsne_series
from dataset import Dataset as TraceData

In [None]:
# this function is copied from Böhm et al. (2022)
# https://github.com/berenslab/ne-spectrum/blob/56e7204710258d541fb716033d3542a4fca2705e/jnb_msc/generator/gauss_line.py#L72
def gauss_clusters(
    n_clusters=10, dim=20, pts_cluster=100, random_state=None, cov=1, stepsize=6,
):
    if random_state is None:
        rng = np.random.RandomState()
    else:
        rng = random_state

    s = stepsize / np.sqrt(dim)
    means = np.linspace(np.zeros(dim), n_clusters * s, num=n_clusters, endpoint=False)
    cov = np.eye(dim) * cov

    clusters = np.array(
        [rng.multivariate_normal(m, cov, size=(pts_cluster)) for m in means]
    )

    X = np.reshape(clusters, (-1, dim))
    y = np.repeat(np.arange(n_clusters), pts_cluster)
    return X, y

In [None]:
data, labels = gauss_clusters()
print(data.shape)


# Embeddings

In [None]:
pca_emb = sc.pp.pca(data, n_comps=2, zero_center=True)
plt.scatter(pca_emb[:, 0], pca_emb[:, 1], c=labels)
plt.gca().set_aspect('equal')

In [None]:
tsne_embs = compute_tsne_series(
                data = data,
                fine_exag_iter=[(10, 200), (5, 200), (3, 200), (1, 200)],
                fpath_prefix="gaussian_line",
                hd_metric= "euclidean",
                init= pca_emb,
                sampling_frac=1, # no need to subsample for this small dataset
                smoothing_perplexity=30,
                random_state=42
)

# Quality Measures and HD Neighbors

In [None]:
trace_data = TraceData(
    hd_data=data,
    name="gauss line",
    verbose=True,
    hd_metric="euclidean",
)

trace_data.add_metadata({"labels": labels.astype(int)})

In [None]:
trace_data.add_embedding(
    name= "PCA",
    embedding = pca_emb,
    category="PCA",
)

for exag, emb in tsne_embs.items():
    trace_data.add_embedding(
        name= f"tSNE_{exag}",
        embedding = emb,
        category="tSNE"
    )

In [None]:
trace_data.compute_quality()

In [None]:
trace_data.print_quality()