In [None]:
import re
import os 
import sys 

import numpy as np
import matplotlib.pyplot as plt
import skimage
from skimage import io

from pathlib import Path
from tqdm.notebook import trange, tqdm
from joblib import Parallel, delayed
from skimage import exposure
import h5py
import pandas as pd
import scanpy as sc
import squidpy as sq
sc.settings.verbosity = 3

from matplotlib.pyplot import rc_context
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from functools import reduce
from matplotlib import cm, colors
import scanorama
import seaborn as sns 
import anndata as ad
from PIL import Image

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# Import path
module_path = str(Path.cwd().parents[0])
if module_path not in sys.path:
    sys.path.append(module_path)
    
module_path = str(Path.cwd().parents[0] / "src")
if module_path not in sys.path:
    sys.path.append(module_path)
    

In [None]:
from config import *
import my_io as io

In [None]:
data_ROI = data_dir / 'ROI_new'

# Clustering

In [None]:
import multiprocessing

n_cpu = multiprocessing.cpu_count()

In [None]:
adata_path = data_ROI  / f"raw.h5ad"

In [None]:
if os.path.exists(adata_path):
    adata = ad.read_h5ad(adata_path)
else:
    print("Expression file not found")

In [None]:
sc.pp.scale(adata, max_value=2.5)

In [None]:
sc.tl.pca(adata, svd_solver='arpack')
sc.pl.pca_variance_ratio(adata, log=True)

In [None]:
sc.pp.neighbors(adata, n_pcs=15)
sc.tl.umap(adata, min_dist=1.0, spread=2.0)

In [None]:
sc.tl.tsne(adata, n_pcs=15, n_jobs=n_cpu)

In [None]:
adata.write(adata_path)

In [None]:
sc.tl.leiden(adata, resolution=0.5)

In [None]:
sc.set_figure_params(format='png')

with rc_context({'figure.figsize': (7, 7), 'figure.dpi': 100}):
    fig = sc.pl.tsne(adata, color=['leiden'], size=5,
                    legend_loc='on data', add_outline=True, 
               legend_fontsize=12, legend_fontoutline=2, 
               frameon=False,  title='clustering of cells')

In [None]:
sc.set_figure_params(format='png')

with rc_context({'figure.figsize': (7, 7), 'figure.dpi': 100}):
    fig = sc.pl.tsne(adata, color=['ROI'], size=5,
                    legend_loc='on data', add_outline=True, 
               legend_fontsize=12, legend_fontoutline=2, 
               frameon=False,  title='clustering of cells')

In [None]:
sc.tl.dendrogram(adata, groupby='leiden')

In [None]:
sc.pl.matrixplot(adata, adata.var_names.tolist(), 'leiden', dendrogram=True,
                 colorbar_title='mean z-score', cmap='RdBu_r', vmin=-2, vmax=2)

In [None]:
# remap some clusters
cluster_join_dict = {str(i):str(i) for i in adata.obs['leiden'].unique().tolist()}
cluster_join_dict['15'] = '1'

In [None]:
adata.obs['clusters'] = (
    adata.obs["leiden"]
    .map(lambda x: cluster_join_dict.get(x, x))
    .astype("category")
)

adata.uns['clusters_colors'] = adata.uns['leiden_colors'][:len(np.unique(adata.obs['clusters']))]
clusters = adata.obs['clusters'].unique().tolist()
clusters.sort(key = int)

In [None]:
adata.obs['clusters'] = adata.obs['clusters'].cat.reorder_categories(clusters)
sc.tl.dendrogram(adata, groupby='clusters')

In [None]:
sc.pl.matrixplot(adata, adata.var_names.tolist(), 'clusters', dendrogram=True,
                 colorbar_title='mean z-score', cmap='RdBu_r', vmin=-2, vmax=2)

In [None]:
sc.pl.heatmap(adata, adata.var_names.tolist(), 'clusters', dendrogram=True, cmap='RdBu_r', figsize=(7,10), vmin=-2, vmax=2)

# Viz

In [None]:
import matplotlib 

def create_clustered_cell(mask, label2cell, my_cmap, **kwargs):
    img = np.zeros((mask.shape[0], mask.shape[1], 3), dtype=np.uint8)
    
    for i, cells in tqdm(label2cell.items(), total=len(label2cell)):
        rows, cols = np.where(np.isin(mask, cells))
        img[rows, cols, :] = my_cmap[int(i)]
    
    return img 

In [None]:
for batch in adata.obs['ROI'].unique():
    adata_subset = adata[adata.obs['ROI'] == batch,]
    with rc_context({'figure.figsize': (10, 10)}):
        sc.pl.spatial(adata_subset, color="clusters", spot_size=10)

In [None]:
colors = adata.uns['leiden_colors']
labels = np.unique(adata.obs.leiden.tolist())

colors_rgb = []
for color in colors:
    colors_rgb.append(matplotlib.colors.hex2color(color))
colors_rgb = np.array(colors_rgb)*255
colors_rgb = colors_rgb.astype(np.uint8)

In [None]:
# Get label2cell dict
label2cell = {}
for l in labels:
    cell_id = adata[adata.obs.leiden == l].obs.Cell.to_list()
    label2cell[l] = cell_id

img = create_clustered_cell(mask, label2cell, colors_rgb)

In [None]:
macrophage = ['1', '9']
with rc_context({'figure.figsize': (10, 10)}):
    sc.pl.spatial(adata, color="leiden", groups=macrophage ,spot_size=10)

In [None]:
# Get label2cell dict
label2cell = {}
for l in macrophage:
    cell_id = adata[adata.obs.leiden == l].obs.Cell.to_list()
    label2cell[l] = cell_id

img = create_clustered_cell(mask, label2cell, colors_rgb)

In [None]:
img = create_clustered_cell(mask, label2cell, colors_rgb)
img = Image.fromarray(img)
img

In [None]:
# import napari 

# viewer = napari.view_image(np.stack(imgs_filtered), channel_axis=0, name=markers)
# viewer.add_labels(mask)