# HistoSweep: Full sweep of H&E images to identify good quality super-pixels for downstream ST analysis

##  *** Please enter input parameters ***

In [1]:
# ===== USER-DEFINED INPUT PARAMETERS =====

# Path prefix to your H&E image folder
HE_prefix = 'HE/demo/'

# Flag for whether to rescale the image 
need_scaling_flag = False  # True if image resolution ≠ 0.5µm (or desired size) per pixel

# Flag for whether to preprocess the image 
need_preprocessing_flag = False  # True if image dimensions are not divisible by patch_size

# The pixel size (in microns) of the raw H&E image 
pixel_size_raw = 0.5  # Typically provided by the scanner/metadata (e.g., 0.25 µm/pixel for 40x)

# Parameter used determine amount of density filtering (e.g artifacts)
density_thresh = 100 # Typically 100 works well, but may need to increase if artifacts are not being effectively removed (e.g. fiducial marker)

# Flag for whether to clean background (i.e. remove isolated debris and small specs outside tissue)
clean_background_flag = True # Set to False if you want to preserve fibrous regions that are otherwise being incorrectly filtered out

# Parameter used to remove isolated debris and small specs outside tissue
min_size = 10 # Decrease if there are many fibrous areas (e.g. adipose) in the tissue that you wish to retain (e.g. 5), increase if lots of/larger debris you wish to remove (e.g.50)


# ===== Additional PARAMETERS (typically do not need to change) =====

# Size of one square patch (superpixel) used throughout processing
patch_size = 16  # 16x16 pixels → typically 8µm if pixel_size = 0.5

# Target pixel size (in microns)
pixel_size = 0.5  # Final desired resolution; keep as 0.5 µm for standardization


Please store your raw histology image as 'he-raw.jpg', scaled image as 'he-scaled.jpg', and final preprocessed image as 'he.jpg' (if using the scaling and preprocessing function provided, this will automatically be done)

## Load in packages and basic functions

In [2]:
%load_ext autoreload
%autoreload 2

import os
from utils import load_image
from saveParameters import saveParams
from computeMetrics import compute_metrics
from densityFiltering import compute_low_density_mask
from textureAnalysis import run_texture_analysis
from ratioFiltering import run_ratio_filtering
from generateMask import generate_final_mask
from additionalPlots import generate_additionalPlots

## Scale and preprocess H&E image 
Preprocess the image: <br>
(1) Scale so that each pixel is size 0.5 µm (he-scaled.jpg)<br>
(2) Pad the scaled image so its height and width are divisible by patch_size (he.jpg)<br>

In [3]:
# rescale and preprocess image

if need_scaling_flag:
    %run rescale.py --image --pixelSizeRaw {pixel_size_raw} --pixelSize {pixel_size} --prefix {HE_prefix}

if need_preprocessing_flag: 
    %run preprocess.py --image --patchSize {patch_size} --prefix {HE_prefix}

In [4]:
image = load_image(os.path.join(HE_prefix, "he.jpg"))
print(image.shape)

Image loaded from HE/demo/he.jpg
(18688, 8448, 3)


## Patchify image into super-pixels and compute metrics

In [5]:
directory = f"{HE_prefix}/HistoSweep_Output"
if not os.path.exists(directory):
    os.makedirs(directory)

In [6]:
saveParams(HE_prefix, need_scaling_flag, need_preprocessing_flag, pixel_size_raw,density_thresh,clean_background_flag,min_size,patch_size,pixel_size)


✅ Parameters saved to: HE/demo/HistoSweep_Output/HistoSweep_parameters.txt


In [7]:
he_std_norm_image_, he_std_image_, z_v_norm_image_, z_v_image_, ratio_norm_, ratio_norm_image_ = compute_metrics(image, patch_size=patch_size)

## Define threshold critera:

### (1) Low density superpixels 

In [8]:
# identify low density superpixels
mask1_lowdensity = compute_low_density_mask(z_v_image_, he_std_image_, ratio_norm_, density_thresh=density_thresh)

In [9]:
print('Total selected for density filtering: ', mask1_lowdensity.sum())


Total selected for density filtering:  8079


In [10]:
# perform texture analysis 
mask1_lowdensity_update = run_texture_analysis(prefix=HE_prefix, image=image, tissue_mask=mask1_lowdensity, patch_size=patch_size, glcm_levels=64)

✅ Entropy map saved as 'glcm_entropy_map_colored.png'
✅ Energy map saved as 'glcm_energy_map_colored.png'
✅ Homogeneity map saved as 'glcm_homogeneity_map_colored.png'

=== GLCM Metric Means ===
   homogeneity    energy   entropy
0     0.457087  0.197041  0.700667
1     0.253880  0.055033  0.871688
2     0.357534  0.106133  0.796654
3     0.586861  0.403446  0.552667

=== Cluster Scores ===
Cluster 0: Score = -0.0465
Cluster 1: Score = -0.5628
Cluster 2: Score = -0.3330
Cluster 3: Score = 0.4376

=== Number of Observations per Cluster ===
Cluster 0: 418
Cluster 1: 985
Cluster 2: 610
Cluster 3: 101
Total: 2114

✅ Clustered texture map saved as 'cluster_labels_colored.png'


### (2) Low ratio superpixels

In [11]:
# identify low ratio superpixels
mask2_lowratio, otsu_thresh = run_ratio_filtering(ratio_norm_, mask1_lowdensity_update)

In [12]:
mask2_lowratio.shape

(616704,)

## Generate final selection of superpixels

In [13]:
generate_final_mask(prefix=HE_prefix, he=image, 
                    mask1_updated = mask1_lowdensity_update, mask2 = mask2_lowratio, 
                    clean_background = clean_background_flag, 
                    super_pixel_size=patch_size, minSize = min_size)


✅ Final masks saved in: HistoSweep_Output


## Generate additional plots
These plots are optional and are intended to provide further insights into the filtering process. Generating them  takes a bit of additional time. This step is not required for the core HistoSweep method.