In [None]:
import pandas as pd
import numpy as np
import scanpy as sc

import matplotlib.pyplot as plt
import seaborn as sns

from anticor_features.anticor_features import get_anti_cor_genes
import topo as tp
from topo.tpgraph.intrinsic_dim import IntrinsicDim

In [None]:
import datetime
print(datetime.datetime.now().isoformat())

2025-02-26T14:37:11.086553


In [None]:
raw = pd.read_csv('./data/schmidtea/dge.txt.gz', sep='\t', index_col=0)

In [None]:
adata = sc.AnnData(raw.T)

In [None]:
meta = pd.read_csv('./data/schmidtea/Planaria_Seurat_annot.csv', index_col=0)

In [None]:
adata.obs = adata.obs.join(meta)

In [None]:
adata.write_h5ad('./data/schmidtea/schmidtea.h5ad')

In [None]:
# PFlogPF normalisation
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.normalize_total(adata)

In [None]:
anti_cor_table = get_anti_cor_genes(adata.X.T,
                                    adata.var.index.tolist(),
                                    species="",
                                    pre_remove_pathways=[])

selected_table = anti_cor_table[anti_cor_table["selected"]==True]

In [None]:
adata.var = pd.concat([adata.var, anti_cor_table], axis=1)

# do some patching to the anti-correlated genes table
adata.var["highly_variable"] = adata.var["selected"].fillna(False)
adata.raw = adata

In [None]:
adata.uns["proposed_dimensionality"] = 25

sc.pp.scale(adata, max_value=10)
sc.pp.pca(adata, use_highly_variable=True)

sc.pp.neighbors(adata, n_neighbors=15, use_rep="X_pca", metric="cosine", n_pcs=adata.uns["proposed_dimensionality"])
sc.tl.umap(adata, min_dist=0.1)

In [None]:
adata.obs

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
sc.pl.embedding(adata, basis="X_umap", color="final_Id", legend_loc="on data", ax=ax, s=30)