In [1]:
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.patches as patches
import seaborn as sns
import tiffile as tiff
import glob
import re
import time
import random
import itertools
import anndata as ad
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from skimage.measure import shannon_entropy
from skimage.transform import integral_image
import scipy.stats as stats
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multitest import multipletests

random.seed(0)
np.random.seed(0)

In [None]:
# load the anndata file with the SIFT descriptors
adata = ad.read_h5ad('/gladstone/engelhardt/lab/adamw/saft_figuren/analysis/adata_20250225_kmeans.h5ad')

# reset the index of the obs dataframe
adata.obs = adata.obs.reset_index(drop=True)

  utils.warn_names_duplicates("obs")


### Create functions for computing the RFP intensity statistics surrounding each ROI

In [None]:


def compute_roi_boundaries(rfp_image, x, y, scales, octaves):
    """
    Compute ROI boundaries for arrays of x, y, scales, and octaves.
    
    The ROI is a square centered at (x,y) with half-size given by
      radius = scales * (2 ** (octaves + 1)).
    """
    radii = scales * (2 ** (octaves + 1))
    xmin = np.clip(np.floor(x - radii).astype(int), 0, rfp_image.shape[0])
    xmax = np.clip(np.ceil(x + radii).astype(int), 0, rfp_image.shape[0])
    ymin = np.clip(np.floor(y - radii).astype(int), 0, rfp_image.shape[1])
    ymax = np.clip(np.ceil(y + radii).astype(int), 0, rfp_image.shape[1])
    return xmin, xmax, ymin, ymax

def compute_mean_intensities(rfp_image, df):
    """
    Compute mean intensity for each ROI in the DataFrame using an integral image.
    
    Parameters:
      rfp_image : 2D numpy array for the RFP channel.
      df        : DataFrame containing columns 'x', 'y', 'scales', and 'octaves'.
      
    Returns:
      A list of mean intensity values (one per ROI).
    """
    # Extract ROI parameters as arrays.
    x = df['x'].values
    y = df['y'].values
    scales = df['scales'].values
    octaves = df['octaves'].values
    
    xmin, xmax, ymin, ymax = compute_roi_boundaries(rfp_image, x, y, scales, octaves)
    
    # Compute the integral image once.
    ii = integral_image(rfp_image)
    
    mean_intensity = []
    for i in range(len(x)):
        x_min = xmin[i]
        x_max = xmax[i]
        y_min = ymin[i]
        y_max = ymax[i]
        area = (x_max - x_min) * (y_max - y_min)
        if area <= 0:
            mean_intensity.append(0)
        else:
            # Compute the sum using four look-ups.
            A = ii[x_max-1, y_max-1] if (x_max-1 >= 0 and y_max-1 >= 0) else 0
            B = ii[x_min-1, y_max-1] if x_min-1 >= 0 else 0
            C = ii[x_max-1, y_min-1] if y_min-1 >= 0 else 0
            D = ii[x_min-1, y_min-1] if (x_min-1 >= 0 and y_min-1 >= 0) else 0
            sum_intensity = A - B - C + D
            mean_intensity.append(sum_intensity / area)
    return mean_intensity

def compute_entropy_for_roi(rfp_image, row):
    """
    Compute the Shannon entropy for a single ROI defined in the row.
    """
    radius = row['scales'] * (2 ** (row['octaves'] + 1))
    x_min = int(np.clip(np.floor(row['x'] - radius), 0, rfp_image.shape[0]))
    x_max = int(np.clip(np.ceil(row['x'] + radius), 0, rfp_image.shape[0]))
    y_min = int(np.clip(np.floor(row['y'] - radius), 0, rfp_image.shape[1]))
    y_max = int(np.clip(np.ceil(row['y'] + radius), 0, rfp_image.shape[1]))
    roi = rfp_image[x_min:x_max, y_min:y_max]
    return shannon_entropy(roi)

def compute_entropies(rfp_image, df):
    """
    Compute Shannon entropies for all ROIs in the DataFrame.
    
    Uses a DataFrame.apply call to process each ROI.
    """
    return df.apply(lambda row: compute_entropy_for_roi(rfp_image, row), axis=1)

def load_image(row, rfp=False):
    """
    Load an image based on the filename provided in the row.
    If rfp is True, load the corresponding RFP channel image.
    """
    if rfp:
        filename = row['filename'].replace('phase_registered', 'red_registered')
    else:
        filename = row['filename']
    return tiff.imread(filename)

# ========================================================
# Main loop: Process each image file (grouped by filename)
# ========================================================
adata2 = adata.copy()  # Create a copy of the anndata object each time I run this loop for testing purposes
adata2.obs = adata2.obs.reset_index(drop=True)  # need to reset the index of the obs dataframe as there are repeats in the original index
num_loops = 0
for bf_path, image_df in adata2.obs.groupby('filename'):
    start_time = time.time()
    
    # Load the RFP image only once for this group.
    rfp_image = load_image(image_df.iloc[0], rfp=True)
    print('Time to load RFP image: {:.2f} seconds'.format(time.time() - start_time))
    
    # Compute mean intensities for all ROIs in this image.
    mean_intensities = compute_mean_intensities(rfp_image, image_df)
    
    # Compute entropies for all ROIs in this image.
    entropies = compute_entropies(rfp_image, image_df)
    
    # Update the main DataFrame using .loc with the image_df indices.
    adata2.obs.loc[image_df.index, 'roi_mean_rfp_intensity'] = mean_intensities
    adata2.obs.loc[image_df.index, 'roi_rfp_entropy'] = entropies

    end_time = time.time()
    print("Time to compute RFP stats for {} ROIs: {:.2f} seconds".format(image_df.shape[0], end_time - start_time))
    num_loops += 1
    
    if num_loops == 5:
        break  # Remove this break to process all images.


  utils.warn_names_duplicates("obs")
  for bf_path, image_df in adata2.obs.groupby('filename'):


Time to load RFP image: 0.00 seconds
Time to compute RFP stats for 118 ROIs: 0.06 seconds
Time to load RFP image: 0.00 seconds
Time to compute RFP stats for 126 ROIs: 0.03 seconds
Time to load RFP image: 0.00 seconds
Time to compute RFP stats for 128 ROIs: 0.02 seconds
Time to load RFP image: 0.00 seconds
Time to compute RFP stats for 119 ROIs: 0.02 seconds
Time to load RFP image: 0.00 seconds
Time to compute RFP stats for 116 ROIs: 0.02 seconds


In [20]:
((adata2.obs.shape[0] / 118) * 0.06 / 60) / 60

0.4787768361581921

In [None]:
adata.obs.head()

In [None]:
((adata.obs.shape[0] / 118) * 2.85 / 60) / 60