In [None]:
import numpy as np
import anndata
import scanpy as sc
import sys  
sys.path.insert(1, '../../backend/')
from tsne import compute_tsne_series
import dataset

In [None]:
### Load adata
import pickle
baseFolder='./'
data = pickle.load(open(baseFolder+"post_adata_mouseFibroblasts.pkl", "rb"))

In [None]:
adata = anndata.AnnData(data)
adata

In [None]:
# these are the interesting metadata features
# sampleID is the same as sampleName, sample are integers
# annotID is the same as annot

adata.obs["sampleName"] = adata.obs["sampleName"].astype("category")
adata.obs["louvain"] = adata.obs["louvain"].astype("category")
adata.obs["type"] = adata.obs["type"].astype("category")
adata.obs["annot"] = adata.obs["annot"].astype("category")

adata.obs = adata.obs.replace("Fibroblast 1", "CV Fibroblasts")
adata.obs = adata.obs.replace("Fibroblast 2", "Bile-duct Fibroblasts")

# clean up metadata
del adata.obs["annotID"]
del adata.obs["sampleID"]
del adata.obs['sampleName']
del adata.uns['annotID_colors']
del adata.uns['umap']
del adata.uns['neighbors']


# Compute Embeddings

In [None]:
pca_emb = sc.pp.pca(adata.obsm["X_totalVI"], n_comps=2, zero_center=True)

In [None]:
%%capture
sc.pp.neighbors(adata, use_rep="X_totalVI", n_neighbors=30, metric="correlation")
sc.tl.umap(adata, min_dist=0.2, init_pos=pca_emb, random_state=42)

umap_emb = adata.obsm["X_umap"]
del adata.obsm["X_umap"]

In [None]:
# center totalVI embeddings
adata.obsm['X_totalVI'] = adata.obsm['X_totalVI'] - np.mean(adata.obsm['X_totalVI'], axis=1, keepdims=True)

In [None]:
tsne_embs = compute_tsne_series(
    data=adata.obsm["X_totalVI"],
    sampling_frac=1,
    fine_exag_iter=[(12, 200), (5, 200), (3, 200), (1, 200)],
    # fpath_prefix="./mouseCD45neg",
    hd_metric="cosine",
    init=pca_emb,
)

# Precompute HD Neighbors and Quality Measures

In [None]:
# create dataset object for TRACE
trace_data = dataset.Dataset(
    name="Mouse Fibroblasts", adata=adata, hd_data_key="X_totalVI", verbose=True, hd_metric="cosine"
)

In [None]:
trace_data.add_embedding(
    embedding = pca_emb,
    name = "PCA",
    category = "PCA",
)
trace_data.add_embedding(
    embedding = umap_emb,
    name = "UMAP",
    category = "UMAP",
    meta_info = {"parameters": {'min_dist': 0.2, 'init_pos': 'PCA', 'random_state': 42}}
)

for exag, embedding in tsne_embs.items():
    trace_data.add_embedding(
        embedding = embedding,
        name = f"tSNE_exag_{exag}",
        category = "tSNE",
    )

In [None]:
trace_data.compute_quality(filename="./Fibroblasts.h5ad")
trace_data.print_quality()