In [None]:
import numpy as np
import anndata
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import sys  
import ast
import math
import time
import loompy as lp # to install loompy and scikit-misc (pip)

sys.path.insert(1, '../../backend/')
import dataset
from tsne import prolongate_embedding
import openTSNE

In [None]:
adata = anndata.read_h5ad("./Fly_Atlas.h5ad", 'r')

In [None]:
init=adata.obsm["X_pca"][:, 0:2]
sampling_frac=0.01
exags=[10, 5, 4, 3, 2, 1]
n_iter=500
early_exag_iter=250
smoothing_iter=250
smoothing_perplexity=30
random_state = 42
hd_metric = "euclidean"

In [None]:
data = np.asarray(adata.obsm["X_pca"], dtype=np.float32)
data_size = data.shape[0]
sampling_size = math.ceil(data_size * sampling_frac)
sample_ind = np.random.choice(data_size, size=sampling_size, replace=False)
coarse_perp = math.ceil((data_size * sampling_frac) / 100)

In [None]:
landmark_knn_index = openTSNE.affinity.get_knn_index(
    data[sample_ind, :], "annoy", int(3*coarse_perp), "euclidean", n_jobs=8, random_state=None, verbose=True
)

In [None]:
print(f"Computing affinities with perplexity {coarse_perp}...")
# computing coarse embedding
start_aff = time.time()
aff_coarse = openTSNE.affinity.PerplexityBasedNN(
    perplexity=coarse_perp,
    method="annoy",
    n_jobs=8,
    random_state=random_state,
    metric=hd_metric,
    verbose=True,
    knn_index=landmark_knn_index,
)
print("openTSNE: Coarse NN search", time.time() - start_aff, flush=True)

In [None]:
# initialization
if init is None:
    print(f"Computing PCA initialization...")
    init = openTSNE.initialization.pca(data[sample_ind, :])
else:
    init = openTSNE.initialization.rescale(init[sample_ind, :])

coarse_embedding = openTSNE.TSNEEmbedding(
    embedding=init,
    affinities=aff_coarse,
    n_jobs=8,
    verbose=True,
    random_state=random_state,
    negative_gradient_method="fft",
)

coarse_embedding.optimize(early_exag_iter, exaggeration=12, inplace=True)
coarse_embedding.optimize(n_iter=n_iter, exaggeration=exags[0], inplace=True)
print("openTSNE: Coarse embedding total", time.time() - start_aff, flush=True)

In [None]:
# now need affinities for whole dataset
print(f"Computing affinities for whole dataset with perplexity {smoothing_perplexity}...")
fine_knn_index = openTSNE.affinity.get_knn_index(
    data, "annoy", int(3*smoothing_perplexity), "euclidean", n_jobs=8, random_state=random_state, verbose=True
)
aff_fine_start = time.time()
aff_fine = openTSNE.affinity.PerplexityBasedNN(
    perplexity=smoothing_perplexity,
    n_jobs=8,
    random_state=random_state,
    metric=hd_metric,
    method="annoy",
    knn_index = fine_knn_index,
)
print("openTSNE: Fine NN search", time.time() - aff_fine_start, flush=True)

In [None]:
fine_init = prolongate_embedding(
    data, coarse_embedding, sample_ind, aff_coarse.knn_index
)
# necessary to rescale the embeddding?
fine_init = openTSNE.initialization.rescale(fine_init)
smooth_embedding = openTSNE.TSNEEmbedding(
    embedding=fine_init,
    affinities=aff_fine,
    n_jobs=8,
    verbose=True,
    random_state=random_state,
    negative_gradient_method="fft"
)

In [None]:
trace_data = dataset.Dataset(
    adata=adata,
    name="Fly",
    hd_metric="euclidean",
    hd_data_key="X_pca"
)

In [None]:
for e in exags:
    smooth_embedding.optimize(smoothing_iter, exaggeration=e, inplace=True)
    fname = f"./tsne_skrodzki_exag_{e}.txt"
    np.savetxt(fname, X=smooth_embedding, delimiter=",")
    trace_data.add_embedding(
        np.asarray(smooth_embedding),
        name=f"tSNE Skrodzki (exag {e})",
        category="PCA 200",
        meta_info={"params": {"exaggeration": e}}
    )

In [None]:
trace_data.align_embeddings(reference_embedding="HVG PCA")

In [None]:
trace_data.save_adata(filename="./fly_atlas_tsne.h5ad")