### Imports

In [1]:
import os
import pandas as pd
import anndata as ad
import scanpy as sc
from SpatialBiologyToolkit import backgating, plotting

### Load AnnnData

In [2]:
adata = ad.read_h5ad('imc_adata_11Aug.h5ad')

# Plotting populations or markers over cells masks using `obs_to_mask`

In [3]:
plotting.obs_to_mask?

[1;31mSignature:[0m
[0mplotting[0m[1;33m.[0m[0mobs_to_mask[0m[1;33m([0m[1;33m
[0m    [0madata[0m[1;33m:[0m [0manndata[0m[1;33m.[0m[0m_core[0m[1;33m.[0m[0manndata[0m[1;33m.[0m[0mAnnData[0m[1;33m,[0m[1;33m
[0m    [0mroi[0m[1;33m:[0m [0mstr[0m[1;33m,[0m[1;33m
[0m    [0mroi_obs[0m[1;33m:[0m [0mstr[0m [1;33m=[0m [1;34m'ROI'[0m[1;33m,[0m[1;33m
[0m    [0mcat_obs[0m[1;33m:[0m [0mstr[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mcat_colour_map[0m[1;33m:[0m [0mstr[0m [1;33m=[0m [1;34m'tab20'[0m[1;33m,[0m[1;33m
[0m    [0mcat_obs_groups[0m[1;33m:[0m [0mlist[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mquant_obs[0m[1;33m:[0m [0mstr[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mquant_colour_map[0m[1;33m:[0m [0mstr[0m [1;33m=[0m [1;34m'viridis'[0m[1;33m,[0m[1;33m
[0m    [0madata_colormap[0m[1;33m:[0m [0mbool[0m [1;33m=[0m [1;32mTrue[0m[1;3

## Plotting a population over masks

In [None]:
# Plot for each ROI in dataset
for roi in adata.obs.ROI.unique().to_list()[:5]:
    
    plotting.obs_to_mask(adata = adata,
                         masks_folder='masks',
                         roi = roi,
                         cat_obs = 'population',
                         cat_colour_map = sc.pl.palettes.zeileis_28, # "tab20" will work fine for <20 pops
                         save_path = f'Population_images/{roi}.png', # using .svg will save as a vector instead
                         masks_ext ='tiff',
                         adata_colormap = True, # You can also use a colour map already defined in the adata
                         label_obs=None, # Ideally we would use 'ObjectNumber' here
                         background_color='black')

## Plotting a marker value over masks

In [None]:
# Plot for each ROI in dataset
for roi in adata.obs.ROI.unique().to_list()[:5]:
    
    plotting.obs_to_mask(adata = adata,
                         masks_folder='masks',
                         roi = roi,
                         quant_obs = 'Iba1',
                         quant_colour_map='Reds',
                         save_path = f'Marker_images/{roi}.png', # using .svg will save as a vector instead
                         masks_ext ='tiff',
                         label_obs=None # Ideally we would use 'ObjectNumber' here
                         )

# Overview of using `backgating_assessment`

The `backgating_assessment` function allows you to ample and visualize cells from different populations in an AnnData. It integrates with images stored on disk, optionally uses segmentation masks, and can overlay the cells or cell boundaries in thumbnail images. It also saves per-population information about marker usage and intensity ranges.

### Key Steps

1. **Population Mean Expression (Optional)**  
   - The function can compute the mean expression of every marker per population and save it to a “mean expression” CSV (e.g., `markers_mean_expression.csv`).  

2. **Selecting the Highest-Expression Markers**  
   - By default, the function can pick the **top N** markers for each population (where N can be 1, 2, or 3) and automatically assign them to R/G/B channels for easy visualization.
   - You can override any channel with a user-specified marker (e.g., `specify_red='CD3'`).

3. **Backgating Settings For Image Rescaling**  
   - It can create or update a “backgating settings” CSV (e.g., `backgating_settings.csv`) that records which markers are displayed in Red, Green, and Blue channels, as well as optional intensity range settings for each population. This can then be edited on disk to adjust the settings for how to decide the minimum and maximum values for each channel, for reach populations. Numeric values (ie, absolute numbers) corrspond to pixel values (ie, counts for IMC). However, we can also use various quartile settings to automatically calculate values:
   
   - **`"q0.97"`**: Use the *mean* of the 97th-percentile intensities across all ROIs.
   - **`"i0.97"`**: Each ROI is clipped to its own 97th-percentile (so every ROI has potentially different max).
   - **`"m0.97"`**: Use the *minimum* of the 97th-percentile intensities across ROIs.
   - **`"x0.97"`**: Use the *maximum* of the 97th-percentile intensities across ROIs.

   > After clipping, intensities are automatically **rescaled** so the new minimum and maximum become `0` and `1`, respectively.

4. **Sampling Cells**  
   - For each population (in `pop_obs`), you can specify a number of cells to sample (e.g., 50 per population).  
   - The function extracts these cells’ coordinates and uses them to create small “thumbnails” from your raw image data.

5. **Image Creation & Overlay**  
   - Internally, it calls a helper function (e.g., `backgating`) that loads/creates composite images of each ROI.  
   - **Masks (Optional)**: If provided, the function can look for segmentation masks in a user-specified folder (or from a CSV mapping ROI->mask file) and overlay boundary lines around the center cell in each thumbnail.

6. **Final Output**  
   - A set of **PNG images** showing each selected cell (thumbnails).  
   - An **overview** image per ROI with bounding boxes for each cell, if you choose.  
   - Two **CSV files**: one for mean expression (if computed), and another for the final backgating settings (marker assignments, intensity ranges, etc.).  
   - A **`cells_list.csv`** showing which cells were plotted in the thumbnails.

### Usage Modes

- **`mode='full'`**: Compute mean expression, assign top markers or use user-specified channels, and then generate backgating images.  
- **`mode='save_markers'`**: Compute mean expression and save marker assignments to CSV, but **do not** generate images.  
- **`mode='load_markers'`**: Skip mean expression—just load markers from an existing settings CSV and run the backgating.


In [8]:
backgating.backgating_assessment?

[1;31mSignature:[0m
[0mbackgating[0m[1;33m.[0m[0mbackgating_assessment[0m[1;33m([0m[1;33m
[0m    [0madata[0m[1;33m,[0m[1;33m
[0m    [0mimage_folder[0m[1;33m:[0m [0mstr[0m[1;33m,[0m[1;33m
[0m    [0mpop_obs[0m[1;33m:[0m [0mstr[0m[1;33m,[0m[1;33m
[0m    [0mmean_expression_file[0m[1;33m:[0m [0mstr[0m [1;33m=[0m [1;34m'markers_mean_expression.csv'[0m[1;33m,[0m[1;33m
[0m    [0mbackgating_settings_file[0m[1;33m:[0m [0mstr[0m [1;33m=[0m [1;34m'backgating_settings.csv'[0m[1;33m,[0m[1;33m
[0m    [0mpops_list[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mcells_per_group[0m[1;33m:[0m [0mint[0m [1;33m=[0m [1;36m50[0m[1;33m,[0m[1;33m
[0m    [0mradius[0m[1;33m:[0m [0mint[0m [1;33m=[0m [1;36m15[0m[1;33m,[0m[1;33m
[0m    [0mroi_obs[0m[1;33m:[0m [0mstr[0m [1;33m=[0m [1;34m'ROI'[0m[1;33m,[0m[1;33m
[0m    [0mx_loc_obs[0m[1;33m:[0m [0mstr[0m [1;33m=[0m [1;34m'X_loc'[0m[1;33

In [None]:
backgating.backgating_assessment(adata=adata,                          
                                  image_folder='images',
                                  mask_folder='masks',
                                  pop_obs='population',
                                  pops_list=['PD-L1+ Schwann cells'],#None, #None will do all populations
                                  cells_per_group=5,
                                  use_masks=True,
                                  minimum=0.2, 
                                  max_quantile='q0.99',
                                  number_top_markers=2,
                                  specify_blue='DNA1', #Sets blue to always be DNA
                                  output_folder='Backgating_results',
                                  show_gallery_titles=False
                                 )

# Overview batch making images with `make_images`

This function creates **composite RGB images** from raw channel images stored in subfolders, specifying a strategy to rescale all the markers so that min/max values are consistent (or not). Each **Region of Interest (ROI)** is in its own subfolder. You can map up to seven different color channels (Red, Green, Blue, Magenta, Cyan, Yellow, White) to any marker of interest.

1. **Loading the Images**  
   - For each channel (e.g., Red, Green, etc.), the function looks for the marker name in the filenames of your `.tif` images.
   - Only the ROIs listed in `samples_list` are used.

2. **Intensity Clipping**  
   Before combining channels into an RGB image, `make_images` clips and rescales each marker image, turning raw intensities into a `[0..1]` range.  
   - **`minimum`**: The lower bound for clipping (all values below are set to this).
   - **`max_quantile`**: A user-specified method for determining the upper bound. It can be:
     - A direct numeric value (e.g., `200.0`), or
     - A string prefix that tells the function how to calculate a max from quantiles:
       - **`"q0.97"`**: Use the *mean* of the 97th-percentile intensities across all ROIs.
       - **`"i0.97"`**: Each ROI is clipped to its own 97th-percentile (so every ROI has potentially different max).
       - **`"m0.97"`**: Use the *minimum* of the 97th-percentile intensities across ROIs.
       - **`"x0.97"`**: Use the *maximum* of the 97th-percentile intensities across ROIs.

   > After clipping, intensities are automatically **rescaled** so the new minimum and maximum become `0` and `1`, respectively.

3. **Combining into an RGB Image**  
   Once each marker is rescaled, the function merges them in an “additive” manner:
   - **Red channel** adds any Red, Magenta, Yellow, White channels.
   - **Green channel** adds Green, Cyan, Yellow, White channels.
   - **Blue channel** adds Blue, Magenta, Cyan, White channels.

4. **Output**  
   - A **`<ROI>.png`** file is saved for each ROI, storing the final composite.
   - You can also specify:
     - **`roi_folder_save=True`** to save each ROI’s `.png` in its own subfolder.
     - **`simple_file_names=True`** to output just `<ROI>.png` without channel info in the filename.

In [10]:
backgating.make_images?

[1;31mSignature:[0m
[0mbackgating[0m[1;33m.[0m[0mmake_images[0m[1;33m([0m[1;33m
[0m    [0mimage_folder[0m[1;33m:[0m [0mstr[0m[1;33m,[0m[1;33m
[0m    [0msamples_list[0m[1;33m:[0m [0mList[0m[1;33m[[0m[0mstr[0m[1;33m][0m[1;33m,[0m[1;33m
[0m    [0moutput_folder[0m[1;33m:[0m [0mstr[0m[1;33m,[0m[1;33m
[0m    [0mname_prefix[0m[1;33m:[0m [0mstr[0m [1;33m=[0m [1;34m''[0m[1;33m,[0m[1;33m
[0m    [0mminimum[0m[1;33m:[0m [0mfloat[0m [1;33m=[0m [1;36m0.2[0m[1;33m,[0m[1;33m
[0m    [0mmax_quantile[0m[1;33m:[0m [0mUnion[0m[1;33m[[0m[0mfloat[0m[1;33m,[0m [0mstr[0m[1;33m][0m [1;33m=[0m [1;34m'q0.97'[0m[1;33m,[0m[1;33m
[0m    [0mred[0m[1;33m:[0m [0mOptional[0m[1;33m[[0m[0mstr[0m[1;33m][0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mred_range[0m[1;33m:[0m [0mOptional[0m[1;33m[[0m[0mTuple[0m[1;33m[[0m[0mfloat[0m[1;33m,[0m [0mUnion[0m[1;33m[[0m[0mstr[0m[1;33m,

In [None]:
# This will get a list of all samples, but you can alternatively just specify which samples
all_samples = adata.obs['ROI'].unique().tolist()

backgating.make_images(
    image_folder='images',
    samples_list=all_samples,
    output_folder='Composite_Images',
    minimum=0.2,
    max_quantile='q0.97',
    red='Iba1',
    green='Cd14',
    blue='DNA1'
)