In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import scanpy as sc
from hv_cancer_modules import DotPlot, UMAPPlot
import holoviews as hv 
hv.extension('bokeh')

In [None]:
marker_genes = {
    "CD14+ Mono": ["FCN1", "CD14"],
    "CD16+ Mono": ["TCF7L2", "FCGR3A", "LYN"],
    # Note: DMXL2 should be negative
    "cDC2": ["CST3", "COTL1", "LYZ", "DMXL2", "CLEC10A", "FCER1A"],
    "Erythroblast": ["MKI67", "HBA1", "HBB"],
    # Note HBM and GYPA are negative markers
    "Proerythroblast": ["CDK6", "SYNGR1", "HBM", "GYPA"],
    "NK": ["GNLY", "NKG7", "CD247", "FCER1G", "TYROBP", "KLRG1", "FCGR3A"],
    "ILC": ["ID2", "PLCG2", "GNLY", "SYNE1"],
    "Naive CD20+ B": ["MS4A1", "IL4R", "IGHD", "FCRL1", "IGHM"],
    # Note IGHD and IGHM are negative markers
    "B cells": [
        "MS4A1",
        "ITGB1",
        "COL4A4",
        "PRDM1",
        "IRF4",
        "PAX5",
        "BCL11A",
        "BLK",
        "IGHD",
        "IGHM",
    ],
    "Plasma cells": ["MZB1", "HSP90B1", "FNDC3B", "PRDM1", "IGKC", "JCHAIN"],
    # Note PAX5 is a negative marker
    "Plasmablast": ["XBP1", "PRDM1", "PAX5"],
    "CD4+ T": ["CD4", "IL7R", "TRBC2"],
    "CD8+ T": ["CD8A", "CD8B", "GZMK", "GZMA", "CCL5", "GZMB", "GZMH", "GZMA"],
    "T naive": ["LEF1", "CCR7", "TCF7"],
    "pDC": ["GZMB", "IL3RA", "COBLL1", "TCF4"],
}

In [None]:
adata = sc.read_h5ad('data/adata-annotated.h5ad') # groupby='cell_type',

# adata = sc.datasets.pbmc3k_processed() # groupby='louvain',

# adata = sc.datasets.paul15()
# sc.pp.pca(adata)
# sc.pp.neighbors(adata)
# sc.tl.umap(adata)

In [None]:
adata

### Some test data plotting with the anndata interface

In [None]:
# adata = sc.datasets.paul15()
# sc.pp.pca(adata)
# sc.pp.neighbors(adata)
# sc.tl.umap(adata)

# scatter = (hv.Scatter(adata, "obsm.X_umap.0", ["obsm.X_umap.1", "obs.paul15_clusters"])
#     .opts(color="obs.paul15_clusters", cmap="Category20",
#           width=500, height=300, legend_position='left', legend_cols=10)
#     .hist()
# )
# scatter

## hv mod dotplot

In [None]:
dot_plot = DotPlot(
    adata=adata,
    marker_genes=marker_genes,
    groupby='cell_type',
    max_dot_size=10,
)
dot_plot.opts(
    color='mean_expression_norm'
    # fontsize={"labels": 10, "ticks": 8}
)

## hv mod UMAPPlot

In [None]:
import pandas as pd
umap_df = pd.DataFrame(adata.obsm["X_umap"], columns=["UMAP1", "UMAP2"])
obs_df = adata.obs.join(umap_df.set_index(adata.obs.index))

In [None]:
umap_opts = dict(
    legend_position='right',
    responsive=True,
    height=400,
    tools=["box_select", "lasso_select"],
    xlabel="UMAP1",
    ylabel="UMAP2",
)

In [None]:
umap_plot = hv.Points(obs_df, ["UMAP1", "UMAP2"]).opts(**umap_opts, color="cell_type", cmap='Category20')

In [None]:
import holoviews.operation.datashader as hd
import datashader as ds

def create_umap_plot(umap_element, labels=False):
    # Apply datashade operation
    umap_shaded = hd.datashade(
        umap_element,
        aggregator=ds.count_cat('cell_type'),
        cmap='Category20',
        min_alpha=40,  # Adjust transparency
    ).opts(**umap_opts)
    
    # Apply dynspread to enhance visibility of sparse points
    umap_spread = hd.dynspread(umap_shaded, threshold=0.5, max_px=4)
    
    if labels:
        # Compute median positions for labels
        data = umap_element.data
        label_df = data.groupby('cell_type')[['UMAP1', 'UMAP2']].median().reset_index()
        labels = hv.Labels(label_df, kdims=['UMAP1', 'UMAP2'], vdims=['cell_type'])
        labels = labels.opts(
            text_font_size='8pt',
            text_color='black',
            text_alpha=0.8,
            text_font_style='bold',
        )
        return umap_spread * labels
    else:
        return umap_spread


In [None]:
ds_umap_plot = create_umap_plot(umap_plot, labels=True)
ds_umap_plot