In [None]:
import scanpy as sc
import hotspot

import numpy as np
# import mplscience
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

In [None]:
# adata = sc.read_h5ad("/home/x-aklein2/projects/aklein/BICAN/BG/data/BICAN_BG_PFV8_annotated_v5.h5ad")
adata = sc.read_h5ad("/home/x-aklein2/projects/aklein/BICAN/BG/data/BICAN_BG_CPS.h5ad")

In [None]:
adata.obs['dataset_id'].unique()[0]

In [None]:
adata_sub = adata[adata.obs['dataset_id'] == "SUBTH_UCI5224_salk"].copy() 
adata_sub

In [None]:
adata_sub.X = adata_sub.layers['counts']
adata_sub.obs['total_counts'] = np.asarray(adata_sub.X.sum(axis=1)).ravel()
adata_sub.layers['csc_counts'] = adata_sub.X.tocsc()

In [None]:
sc.pp.normalize_total(adata_sub, target_sum=1e4)
sc.pp.log1p(adata_sub)

In [None]:
hs = hotspot.Hotspot(
    adata_sub, 
    layer_key="csc_counts",
    model="danb", 
    latent_obsm_key="spatial", 
    umi_counts_obs_key="total_counts"
)

In [None]:
hs.create_knn_graph(weighted_graph=False, n_neighbors=300)

In [None]:
hs_results = hs.compute_autocorrelations(jobs=16)
hs_results.head()

In [None]:
hs_genes = hs_results.index[hs_results.FDR < 0.05]
print(len(hs_genes))
lcz = hs.compute_local_correlations(hs_genes, jobs=16)

In [None]:
modules = hs.create_modules(
    min_gene_threshold=20, core_only=False, fdr_threshold=0.05
)
modules.value_counts()

In [None]:
hs.plot_local_correlations()

In [None]:
# modules

In [None]:
mod = 2
res = hs.results.join(hs.modules)
res = res.loc[res.Module == mod]
res.sort_values("Z", ascending=False).head(10)

In [None]:
cmap = mcolors.LinearSegmentedColormap.from_list(
    'grays', ['#DDDDDD', '#000000'])

module = 1
results = hs.results.join(hs.modules)
results = results.loc[results.Module == module]
genes = results.sort_values('Z', ascending=False).head(6).index

sc.pl.spatial(
    adata_sub, 
    color=genes, 
    cmap=cmap, 
    frameon=False,
    vmin='p0',
    vmax='p95',
    spot_size=30,
)


In [None]:
module_scores = hs.calculate_module_scores()
module_scores.head()

In [None]:
module_cols = []
for c in module_scores.columns: 
    key = f"Module {c}_score"
    adata_sub.obs[key] = module_scores[c]
    module_cols.append(key)

In [None]:
adata_sub.obs['module_assignment'] = adata_sub.obs[module_cols].idxmax(axis=1)

In [None]:
adata_sub.obs.groupby('module_assignment').size()

In [None]:
df_module_assignments = adata_sub.obs.groupby('module_assignment')['Group'].value_counts().to_frame().reset_index()

In [None]:
for module in module_cols: 
    df_sub = df_module_assignments.loc[df_module_assignments['module_assignment'] == module]
    display(df_sub.sort_values(ascending=False, by='count').head(5))
    # break

In [None]:
sc.pl.spatial(adata_sub, color=module_cols, frameon=False, spot_size=30, vmin="p0", vmax="p95")

In [None]:
cmap = mcolors.LinearSegmentedColormap.from_list(
    'grays', ['#DDDDDD', '#000000'])

module = 6
results = hs.results.join(hs.modules)
results = results.loc[results.Module == module]
genes = results.sort_values('Z', ascending=False).head(6).index

sc.pl.spatial(
    adata_sub, 
    color=genes, 
    cmap=cmap, 
    frameon=False,
    vmin='p0',
    vmax='p95',
    spot_size=30,
)
