In [None]:
import anndata
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scanpy as sc
import tifffile
from skimage.segmentation import find_boundaries
from skimage.transform import resize

In [None]:
%load_ext autoreload

In [None]:
%autoreload 2

In [None]:
import mip.utils as utils
from mip.gating import gate_region

## load in data

In [None]:
# sample = 'HT323B1-H3'
sample = 'HT339B1-H4A4'
# sample = 'HT397B1-H3A1'
# sample = 'HT206B1-H1'
fp = f'/diskmnt/Projects/Users/estorrs/multiplex_data/codex/htan/{sample}/level_4/spatial_features.h5ad'

In [None]:
adata = sc.read_h5ad(fp)
adata

In [None]:
sc.pl.highest_expr_genes(adata, n_top=20, )

In [None]:
sc.pp.log1p(adata)

In [None]:
adata.var.index.to_list()

In [None]:
adata.raw = adata

In [None]:
annotation_marker_map = {
    'Endothelial': ['CD31'],
    'Epithelial': ['E-cadherin', 'Pan-CK'],
    'B cell': ['CD20'],
    'Macrophage': ['CD68', 'CD163'],
#     'DC': ['CD11c'],
#     'T cell': ['CD3e', 'CD4', 'CD8', 'CD45RO'],
    'T cell': ['CD3e', 'CD4', 'CD8'],
    'Fibroblast/Myo': ['Podoplanin', 'SMA', 'Vimentin'],
}
annotation_markers = [v for k, vs in annotation_marker_map.items() for v in vs]

In [None]:
# I've found it helpful to limit to markers only used in annotation
# This helps keep the clusters cleaner
adata = adata[:, annotation_markers]
adata

In [None]:
sc.pp.neighbors(adata, n_neighbors=15)

In [None]:
sc.tl.umap(adata)

In [None]:
sc.tl.leiden(adata, resolution=1.)

In [None]:
sc.pl.umap(adata, color=['leiden'], legend_loc='on data')

In [None]:
sc.pl.umap(adata, color=annotation_markers)

In [None]:
cluster_map = {
    'Immune': [17, 7, 21, 8, 1, 16, 13],
    'Stroma': [6, 0, 12, 18, 4, 9, 15, 14],
    'Epithelial': [10, 3, 2, 19, 5, 11]
}

r = {str(v):k for k, vs in cluster_map.items() for v in vs}
adata.obs['cell_type_general'] = [r.get(l, l)
                          for l in adata.obs['leiden']]

In [None]:
sc.pl.umap(adata, color=['cell_type_general'], ncols=1)

In [None]:
adata.obs['centroid_row_inverted'] = [-x for x in adata.obs['centroid_row']]
sc.pl.scatter(adata, x='centroid_col', y='centroid_row_inverted', color='cell_type_general')

In [None]:
adata.write_h5ad(f'/diskmnt/Projects/Users/estorrs/multiplex_data/codex/htan/{sample}/level_4/cell_annotation_macro.h5ad')

#### gating

In [None]:
adata = sc.read_h5ad(f'/diskmnt/Projects/Users/estorrs/multiplex_data/codex/htan/{sample}/level_4/cell_annotation_macro.h5ad')
adata

In [None]:
channel_to_img = utils.extract_ome_tiff(f'/diskmnt/Projects/Users/estorrs/multiplex_data/codex/htan/{sample}/level_2/{sample}.ome.tiff')
channel_to_img.keys()

In [None]:
seg_img = tifffile.imread(f'/diskmnt/Projects/Users/estorrs/multiplex_data/codex/htan/{sample}/level_3/segmentation/cell_segmentation.tif')
boundary_img = find_boundaries(seg_img)
seg_img.shape

In [None]:
pseudo = tifffile.imread(f'/diskmnt/Projects/Users/estorrs/multiplex_data/codex/htan/{sample}/level_2/pseudo.tiff')
pseudo.shape

In [None]:
## defaults
gating_map = {
    'Immune': {
        'CD8_T_cell': {'CD8': 8.38, 'CD3e': 6.66},
        'CD4_T_cell': {'CD4': 7.17, 'CD3e': 6.66},
        'Macrophage': {'CD68': 7.53},
        'B_cell': {'CD20': 7.69},
#         'DC': {('CD11c', 'HLA-DR'): 6.52},
        'Fibroblast': {('Vimentin', 'Podoplanin', 'SMA'): 5.76}
    },
    'Stroma': {
        'Endothelial': {'CD31': 7.35},
        'Fibroblast': {('Vimentin', 'Podoplanin', 'SMA'): 5.76}
    },
    'Epithelial': {
        'Epithelial': {('Pan-CK', 'E-cadherin'): 6.05},
        'Myoepithelial': {('Podoplanin', 'SMA'): 7.88},
        'Fibroblast': {('Vimentin', 'Podoplanin', 'SMA'): 5.76}
    }
}

In [None]:
# # in case of different marker names
# gating_map = {
#     'Immune': {
#         'CD8_T_cell': {'CD8': 8.38, 'CD3': 6.66},
#         'CD4_T_cell': {'CD4': 7.17, 'CD3': 6.66},
#         'Macrophage': {'CD68': 7.53},
#         'B_cell': {'CD20': 7.69},
#         'DC': {('CD11c', 'HLA-DR'): 6.52},
#         'Fibroblast': {('Vimentin', 'Podoplanin', 'SMA'): 5.76}
#     },
#     'Stroma': {
#         'Endothelial': {'CD31': 7.35},
#         'Fibroblast': {('Vimentin', 'Podoplanin', 'SMA'): 5.76}
#     },
#     'Epithelial': {
#         'Epithelial': {('Pan-CK', 'E-cadherin'): 6.05},
#         'Myoepithelial': {('Podoplanin', 'SMA'): 7.88},
#         'Fibroblast': {('Vimentin', 'Podoplanin', 'SMA'): 5.76}
#     }
# }

###### cd8 t cell

In [None]:
channel = 'CD8'
gate_region(adata, channel, channel_img=channel_to_img[channel], boundary_img=boundary_img,
            cell_type='Immune', cell_type_col='cell_type_general',
            default_value=gating_map['Immune']['CD8_T_cell'][channel], radius=500)

In [None]:
channel = 'CD8'
gate_region(adata, channel, channel_img=channel_to_img[channel], boundary_img=boundary_img,
            cell_type='15', cell_type_col='leiden',
            default_value=gating_map['Immune']['CD8_T_cell'][channel])

In [None]:
channel = 'CD3e'
gate_region(adata, channel, channel_img=channel_to_img[channel], boundary_img=boundary_img,
            cell_type='Immune', cell_type_col='cell_type_general',
            default_value=gating_map['Immune']['CD8_T_cell'][channel])

In [None]:
gating_map['Immune']['CD8_T_cell'] = {
    'CD8': 6.88,
    'CD3e': 6.66
}

###### cd4 t cell

In [None]:
channel = 'CD4'
gate_region(adata, channel, channel_img=channel_to_img[channel], boundary_img=boundary_img,
            cell_type='Immune', cell_type_col='cell_type_general',
            default_value=gating_map['Immune']['CD4_T_cell'][channel])

In [None]:
gate_region(adata, channel, channel_img=channel_to_img[channel], boundary_img=boundary_img,
            cell_type='11', cell_type_col='leiden',
            default_value=gating_map['Immune']['CD4_T_cell'][channel])

In [None]:
channel = 'CD3e'
gate_region(adata, channel, channel_img=channel_to_img[channel], boundary_img=boundary_img,
            cell_type='Immune', cell_type_col='cell_type_general',
            default_value=gating_map['Immune']['CD4_T_cell'][channel])

In [None]:
gating_map['Immune']['CD4_T_cell'] = {
    'CD4': 6.13,
    'CD3e': 6.66
}

###### macrophage

In [None]:
channel = 'CD68'
gate_region(adata, channel, channel_img=channel_to_img[channel], boundary_img=boundary_img,
            cell_type='Immune', cell_type_col='cell_type_general',
            default_value=gating_map['Immune']['Macrophage'][channel])

In [None]:
gate_region(adata, channel, channel_img=channel_to_img[channel], boundary_img=boundary_img,
            cell_type='8', cell_type_col='leiden',
            default_value=gating_map['Immune']['Macrophage'][channel], radius=500)

In [None]:
gating_map['Immune']['Macrophage'] = {
    'CD68': 7.53
}

###### B cell

In [None]:
channel = 'CD20'
r1, r2, c1, c2 = gate_region(
    adata, channel, channel_img=channel_to_img[channel], boundary_img=boundary_img,
    cell_type='Immune', cell_type_col='cell_type_general',
    default_value=gating_map['Immune']['B_cell'][channel]
)

In [None]:
gate_region(adata, channel, channel_img=channel_to_img[channel], boundary_img=boundary_img,
            cell_type='13', cell_type_col='leiden',
            default_value=gating_map['Immune']['B_cell'][channel])

In [None]:
gating_map['Immune']['B_cell'] = {
    'CD20': 6.68
}

In [None]:
# plt.imshow(pseudo[r1:r2, c1:c2])

###### DC

In [None]:
channel = ('CD11c', 'HLA-DR')
img = channel_to_img['CD11c'] + channel_to_img['HLA-DR']
r1, r2, c1, c2 = gate_region(
    adata, channel, channel_img=img, boundary_img=boundary_img,
    cell_type='16', cell_type_col='leiden',
    default_value=gating_map['Immune']['DC'].get(channel, 0.)
)

In [None]:
gate_region(adata, channel, channel_img=img, boundary_img=boundary_img,
            cell_type='3', cell_type_col='leiden',
            default_value=gating_map['Immune']['DC'].get(channel, 0.))

In [None]:
gating_map['Immune']['DC'] = {
    ('CD11c', 'HLA-DR'): 6.52
}

In [None]:
r1, c1

###### Endothelial

In [None]:
channel = 'CD31'
r1, r2, c1, c2 = gate_region(
    adata, channel, channel_img=channel_to_img[channel], boundary_img=boundary_img,
    cell_type='Stroma', cell_type_col='cell_type_general',
    default_value=gating_map['Stroma']['Endothelial'][channel]
)

In [None]:
r1, r2, c1, c2 = gate_region(
    adata, channel, channel_img=channel_to_img[channel], boundary_img=boundary_img,
    cell_type='4', cell_type_col='leiden',
    default_value=gating_map['Stroma']['Endothelial'][channel], radius=500
)

In [None]:
gating_map['Stroma']['Endothelial'] = {
    'CD31': 6.17
}

###### Fibroblast

In [None]:
channel = ('Vimentin', 'Podoplanin', 'SMA')
img = channel_to_img['Vimentin'] + channel_to_img['Podoplanin'] + channel_to_img['SMA']
r1, r2, c1, c2 = gate_region(
    adata, channel, channel_img=img, boundary_img=boundary_img,
    cell_type='Stroma', cell_type_col='cell_type_general',
    default_value=gating_map['Stroma']['Fibroblast'][channel]
)

In [None]:
r1, r2, c1, c2 = gate_region(
    adata, channel, channel_img=img, boundary_img=boundary_img,
    cell_type='6', cell_type_col='leiden',
    default_value=gating_map['Stroma']['Fibroblast'][channel]
)

In [None]:
gating_map['Stroma']['Fibroblast'] = {
    ('Vimentin', 'Podoplanin', 'SMA'): 5.07
}
gating_map['Immune']['Fibroblast'] = {
    ('Vimentin', 'Podoplanin', 'SMA'): 5.07
}
gating_map['Epithelial']['Fibroblast'] = {
    ('Vimentin', 'Podoplanin', 'SMA'): 5.07
}

###### Epithelial

In [None]:
channel = ('Pan-CK', 'E-cadherin')
img = channel_to_img['Pan-CK'] + channel_to_img['E-cadherin']
r1, r2, c1, c2 = gate_region(
    adata, channel, channel_img=img, boundary_img=boundary_img,
    cell_type='Epithelial', cell_type_col='cell_type_general',
    default_value=gating_map['Epithelial']['Epithelial'].get(channel, 0.)
)

In [None]:
r1, r2, c1, c2 = gate_region(
    adata, channel, channel_img=img, boundary_img=boundary_img,
    cell_type='3', cell_type_col='leiden',
    default_value=gating_map['Epithelial']['Epithelial'][channel]
)

In [None]:
gating_map['Epithelial']['Epithelial'] = {
    ('Pan-CK', 'E-cadherin'): 6.05
}

In [None]:
# plt.imshow(pseudo[r1:r2, c1:c2])

###### Myoepithelial

In [None]:
channel = ('Podoplanin', 'SMA')
img = channel_to_img['Podoplanin'] + channel_to_img['SMA']
r1, r2, c1, c2 = gate_region(
    adata, channel, channel_img=img, boundary_img=boundary_img,
    cell_type='Epithelial', cell_type_col='cell_type_general',
    default_value=gating_map['Epithelial']['Myoepithelial'][channel]
)

In [None]:
r1, r2, c1, c2 = gate_region(
    adata, channel, channel_img=img, boundary_img=boundary_img,
    cell_type='2', cell_type_col='leiden',
    default_value=gating_map['Epithelial']['Myoepithelial'][channel],
    radius=500
)

In [None]:
plt.imshow(pseudo[r1:r2, c1:c2])

In [None]:
gating_map['Epithelial']['Myoepithelial'] = {
    ('Podoplanin', 'SMA'): 6.38
}

###### call cell types

In [None]:
gating_map

In [None]:
def bin_cells(adata, marker_dict, valid_mask=None):
    keep = []
    for k, v in marker_dict.items():
        keep.append([True if x >= v else False
                    for i, x in enumerate(adata.raw[:, k].X.mean(axis=1).flatten())])
    keep = np.asarray(keep)
    bools = keep.sum(axis=0) == len(marker_dict)
    
    if valid_mask is not None:
        bools = bools & valid_mask
    
    return bools

In [None]:
cell_type = np.asarray(['Other'] * adata.shape[0], dtype=object)
order_of_operations = [
    'Fibroblast',
    'Endothelial',
    'CD4_T_cell',
    'CD8_T_cell',
    'Macrophage',
    'B_cell',
    'DC',
    'Epithelial',
    'Myoepithelial'
]
for general, d1 in gating_map.items():
    order = [ct for ct in order_of_operations if ct in d1.keys()]
    for ct in order:
        bools = bin_cells(adata, d1[ct], valid_mask=adata.obs['cell_type_general']==general)
        cell_type[bools] = ct
adata.obs['cell_type'] = list(cell_type)

In [None]:
sc.pl.umap(adata, color=['cell_type'], ncols=1)

In [None]:
sc.pl.umap(adata, color=['cell_type_general'], ncols=1)

In [None]:
sc.pl.scatter(adata, x='centroid_col', y='centroid_row_inverted', color='cell_type')

#### other cell types

In [None]:
adata.obs['cell_type_specific'] = adata.obs['cell_type'].to_list()

###### proliferating cells

In [None]:
default_value = 8.73
# default_value = 7.84

In [None]:
channel = 'Ki67'
r1, r2, c1, c2 = gate_region(
    adata, channel, channel_img=channel_to_img[channel], boundary_img=boundary_img,
    cell_type='Epithelial', cell_type_col='cell_type_general',
    default_value=default_value, radius=500
)

In [None]:
channel = 'Ki67'
r1, r2, c1, c2 = gate_region(
    adata, channel, channel_img=channel_to_img[channel], boundary_img=boundary_img,
    cell_type='Immune', cell_type_col='cell_type_general',
    default_value=default_value
)

In [None]:
r1, r2, c1, c2 = gate_region(
    adata, channel, channel_img=channel_to_img[channel], boundary_img=boundary_img,
    cell_type='0', cell_type_col='leiden',
    default_value=default_value
)

In [None]:
adata.obs['is_proliferating'] = ['Yes' if m > default_value else 'False'
                                 for m in adata.raw[:, channel].X.flatten()]

In [None]:
adata.obs['centroid_row_inverted'] = [-x for x in adata.obs['centroid_row']]
sc.pl.scatter(adata, x='centroid_col', y='centroid_row_inverted', color='is_proliferating')

In [None]:
gating_map['is_proliferating'] = {'Ki67': default_value}

###### m1 vs m2 macrophage

In [None]:
default_value = 7.17
# default_value = 5.81

In [None]:
channel = 'CD163'
r1, r2, c1, c2 = gate_region(
    adata, channel, channel_img=channel_to_img[channel], boundary_img=boundary_img,
    cell_type='Macrophage', cell_type_col='cell_type',
    default_value=default_value, radius=500
)

In [None]:
def call_cell(ct, x):
    if ct == 'Macrophage':
        if x >= default_value:
            return 'Macrophage - M1'
        else:
            return 'Macrophage - M2'
    return ct
    
adata.obs['cell_type_specific'] = [call_cell(ct, x)
                                   for ct, x in zip(adata.obs['cell_type'], adata.raw[:, channel].X.flatten())]

In [None]:
sc.pl.scatter(adata, x='centroid_col', y='centroid_row_inverted', color='cell_type_specific')

In [None]:
gating_map['M1_v2_M2_Macrophage'] = {'CD163': default_value}

#### add low-res spatial tifs

In [None]:
scale_factor = .2
pseudo_small = resize(pseudo, (pseudo.shape[0] * scale_factor, pseudo.shape[1] * scale_factor),
                      anti_aliasing=False)
pseudo_small.shape

In [None]:
adata.obsm['spatial'] = adata.obs[['centroid_col', 'centroid_row']].values
spatial_key = "spatial"
library_id = sample
adata.uns[spatial_key] = {library_id: {}}
adata.uns[spatial_key][library_id]["images"] = {"hires": pseudo_small}
adata.uns[spatial_key][library_id]["scalefactors"] = {
    "tissue_hires_scalef": scale_factor, "spot_diameter_fullres": 10}

for channel, img in channel_to_img.items():
    print(channel)
    adata.uns[spatial_key][library_id]["images"][channel] = resize(
        img, (img.shape[0] * scale_factor, img.shape[1] * scale_factor), anti_aliasing=True)
    adata.uns[spatial_key][library_id]["scalefactors"][f"tissue_{channel}_scalef"] = scale_factor


In [None]:
sc.pl.spatial(adata, color='cell_type_specific', crop_coord=[5000, 6000, 5000, 6000])

#### save object

In [None]:
# this looks horrible
ks = []
for k, d1 in gating_map.items():
    for k2, d2 in d1.items():
        if isinstance(d2, dict):
            for k3 in d2.keys():
                v = d2[k3]
                if isinstance(k3, tuple):
                    d2['_'.join(k3)] = v
                    ks.append(k3)
                    d2.pop(k3)         
    
adata.uns['gates'] = gating_map
adata.uns['cell_typing_order'] = order_of_operations

In [None]:
adata.write_h5ad(f'/diskmnt/Projects/Users/estorrs/multiplex_data/codex/htan/{sample}/level_4/cell_annotation_full.h5ad')


In [None]:
sample = 'HT323B1-H1A1'
adata = sc.read_h5ad(f'/diskmnt/Projects/Users/estorrs/multiplex_data/codex/htan/{sample}/level_4/cell_annotation_full.h5ad')


#### qitissue cluster file

In [None]:
df = adata.obs[['cell_type']]
df.index.name = 'CellID'
df.columns = ['Cluster']

m = {
    'Epithelial': '1',
    'Endothelial': '2',
    'Immune': '3'
}

df['label'] = df['Cluster'].to_list()
df['Cluster'] = [m[x] for x in df['Cluster']]

df.to_csv(f'/diskmnt/Projects/Users/estorrs/multiplex_data/codex/htan/{sample}/level_4/cell_annotation_macro.csv')