<a href="https://colab.research.google.com/github/bnsreenu/python_for_microscopists/blob/master/352_Automated_Analysis_of_Organoid_Screening_Multi_Well_Datasets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://youtu.be/KI_zNWASTqg

# Organoid Image Analysis


## Description
This code performs image processing, segmentation, data extraction,
analysis, and visualization of organoid area data over time for multiple wells.

**Some Notes**
- Cellpose is used for image segmentation, with options for automatic or manual diameter estimation
- ANOVA tests are performed to detect significant changes in area over time for each well
- Visualization includes violin plots and mean area over time for individual wells
- A heatmap is generated to show the ratio of final to initial average area for all wells,
  providing an overview of growth across the entire plate


In [None]:
!pip install cellpose
!pip install csbdeep

Collecting cellpose
  Downloading cellpose-3.1.0-py3-none-any.whl.metadata (24 kB)
Collecting fastremap (from cellpose)
  Downloading fastremap-1.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting imagecodecs (from cellpose)
  Downloading imagecodecs-2024.12.30-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting roifile (from cellpose)
  Downloading roifile-2024.9.15-py3-none-any.whl.metadata (5.5 kB)
Downloading cellpose-3.1.0-py3-none-any.whl (215 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m215.2/215.2 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fastremap-1.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.4/6.4 MB[0m [31m87.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading imagecodecs-2024.12.30-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (43.4 MB)
[2K   [90m━━

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy import stats  #we will perform one-way ANOVA using f_oneway method from this library.
from skimage import io, filters, morphology, util, measure
from skimage.measure import regionprops_table
from csbdeep.utils import normalize
from cellpose import models, core  # For object segmentation
import colorsys
import os
import glob
from datetime import datetime

In [None]:
# Check GPU availability - need this to speed up Cellpose segmentation
use_GPU = core.use_gpu()
print(f'>>> GPU activated? {"YES" if use_GPU else "NO"}')

>>> GPU activated? YES


In [None]:
def create_output_dirs(base_dir):
    """Create output directories for saving images."""
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    dirs = {
        'processed': os.path.join(base_dir, 'processed_images', timestamp),
        'segmented': os.path.join(base_dir, 'segmented_images', timestamp),
        'plots': os.path.join(base_dir, 'analysis_plots', timestamp),
        'diameter_plots': os.path.join(base_dir, 'diameter_plots', timestamp)
    }

    for dir_path in dirs.values():
        os.makedirs(dir_path, exist_ok=True)
        print(f"Created directory: {dir_path}")

    return dirs

def save_figure(fig, filename, output_dir):
    """Save figure with high resolution."""
    filepath = os.path.join(output_dir, filename)
    fig.savefig(filepath, dpi=300, bbox_inches='tight')
    print(f"Saved figure to: {filepath}")
    plt.close(fig)  # Close the figure to free memory

def rolling_ball_background_subtraction(image, radius):
    """Perform rolling ball background subtraction on the image."""
    image_uint8 = util.img_as_ubyte(image)
    selem = morphology.disk(radius)
    background = filters.rank.minimum(image_uint8, selem)
    background = filters.rank.maximum(background, selem)
    background = background.astype(float) / 255.0
    result = image - background
    return np.clip(result, 0, None)

def random_colors(N, bright=True):
    """Generate N random colors."""
    brightness = 1.0 if bright else 0.7
    hsv = [(i / N, 1, brightness) for i in range(N)]
    colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
    np.random.shuffle(colors)
    return colors

def color_mask(mask):
    """Create a colored mask for visualization."""
    N = len(np.unique(mask)) - 1
    colors = random_colors(N)
    colored_mask = np.zeros((mask.shape[0], mask.shape[1], 3))
    for i, color in enumerate(colors):
        colored_mask[mask == i+1] = color
    return colored_mask

def estimate_diameter(image, manual_thresh=None, min_size=10, scale_factor=1.0, output_dir=None, well_id=None, timepoint=None):
    """Estimate the median diameter of objects in the image."""
    if manual_thresh is None:
        thresh = filters.threshold_otsu(image)
    else:
        thresh = manual_thresh
    binary = image > thresh
    labeled = measure.label(binary)
    props = measure.regionprops(labeled)
    all_diameters = [prop.equivalent_diameter for prop in props]

    fig = plt.figure(figsize=(10, 5))
    plt.hist(all_diameters, bins=50)
    plt.title(f'Distribution of Object Diameters - Well {well_id}, Time {timepoint}')
    plt.xlabel('Diameter (pixels)')
    plt.ylabel('Count')
    plt.axvline(x=min_size, color='r', linestyle='--', label=f'Min Size ({min_size})')
    plt.legend()

    if output_dir and well_id and timepoint:
        save_figure(fig, f'diameter_distribution_{well_id}_t{timepoint}.png', output_dir)

    filtered_diameters = [d for d in all_diameters if d >= min_size]

    if not filtered_diameters:
        print(f"Warning: No objects found with diameter >= {min_size} pixels.")
        return None

    median_diameter = np.median(filtered_diameters)
    estimated_diameter = round(median_diameter * scale_factor)
    print(f"Well {well_id}, Time {timepoint}:")
    print(f"Number of objects considered: {len(filtered_diameters)}")
    print(f"Median diameter before scaling: {median_diameter:.2f}")
    print(f"Estimated diameter after scaling: {estimated_diameter}")

    return estimated_diameter

def segment_and_visualize(img, well_id, timepoint, diameter=None, flow_threshold=0.4,
                         manual_thresh=None, min_size=10, scale_factor=1.0, output_dirs=None):
    """Segment the image using Cellpose and visualize the results."""
    if diameter is None:
        diameter = estimate_diameter(img, manual_thresh=manual_thresh, min_size=min_size,
                                  scale_factor=scale_factor, output_dir=output_dirs['diameter_plots'],
                                  well_id=well_id, timepoint=timepoint)
        if diameter is None:
            print("Could not estimate diameter. Using default value.")
            diameter = 30

    # Initialize model with GPU if available
    model = models.Cellpose(model_type='cyto', gpu=use_GPU)
    masks, flows, styles, diams = model.eval(img, diameter=diameter,
                                           flow_threshold=flow_threshold,
                                           channels=[0,0])
    colored_masks = color_mask(masks)

    # Create and save processed image
    fig_processed = plt.figure(figsize=(10, 10))
    plt.imshow(img, cmap='gray')
    plt.title(f'Processed Image - Well {well_id}, Time {timepoint}')
    plt.axis('off')
    if output_dirs:
        save_figure(fig_processed, f'processed_{well_id}_t{timepoint}.png',
                   output_dirs['processed'])

    # Create and save segmentation image
    fig_segment = plt.figure(figsize=(10, 10))
    plt.imshow(colored_masks)
    plt.title(f'Segmented Objects - Well {well_id}, Time {timepoint}\nDiameter: {diameter}, Flow Threshold: {flow_threshold}')
    plt.axis('off')
    if output_dirs:
        save_figure(fig_segment, f'segmented_{well_id}_t{timepoint}.png',
                   output_dirs['segmented'])

    print(f"Total objects detected for Well {well_id}, Time {timepoint}: {len(np.unique(masks)) - 1}")
    return masks

def process_image(file_path, output_dirs):
    """Process a single image file and extract object areas."""
    print(f"\nProcessing file: {file_path}")
    img_stack = io.imread(file_path)
    file_name = os.path.basename(file_path)
    well_id = os.path.splitext(file_name)[0]

    all_data = []

    for time, img in enumerate(img_stack):
        print(f"\nProcessing Well {well_id}, Time point {time + 1}")
        img = util.invert(img) / 255.0  # Invert and normalize to [0, 1]
        img_bg_subtracted = rolling_ball_background_subtraction(img, radius=30)
        img_norm = normalize(img_bg_subtracted, 1, 99.8)

        segmented_masks = segment_and_visualize(
            img_norm, well_id, time+1, diameter=None,
            flow_threshold=0.5, manual_thresh=0.3,
            min_size=5, scale_factor=1.1,
            output_dirs=output_dirs
        )

        props = measure.regionprops(segmented_masks, intensity_image=img_norm)

        for prop in props:
            all_data.append({
                'file_name': file_name,
                'well_id': well_id,
                'time': time + 1,
                'object_id': prop.label,
                'area': prop.area
            })

    return all_data

def plot_area_evolution(df, wells, output_dir=None):
    """Plot the evolution of organoid area over time for each well."""
    plt.rcParams.update({
        'font.size': 12,
        'axes.titlesize': 16,
        'axes.labelsize': 14,
        'xtick.labelsize': 12,
        'ytick.labelsize': 12,
        'legend.fontsize': 12,
        'figure.titlesize': 20
    })

    sns.set_style("whitegrid")
    sns.set_palette("husl")

    fig, axes = plt.subplots(2, 3, figsize=(24, 16), sharex=True, sharey=True)
    fig.suptitle('Evolution of Organoid Area Over Time by Well', fontsize=24, y=1.02)

    axes = axes.flatten()

    for i, well in enumerate(wells):
        well_data = df[df['well_id'] == well]

        sns.violinplot(x='time', y='area', data=well_data, ax=axes[i], inner="box", cut=0)
        sns.stripplot(x='time', y='area', data=well_data, ax=axes[i], color='darkblue',
                     alpha=0.5, size=3, jitter=0.3)

        mean_area = well_data.groupby('time')['area'].mean()
        axes[i].plot(mean_area.index, mean_area.values, color='red', linewidth=2, label='Mean')

        axes[i].set_title(f'Well {well}', fontsize=18, pad=20)
        axes[i].set_xlabel('Time', fontsize=16, labelpad=10)
        axes[i].set_ylabel('Area (μm²)', fontsize=16, labelpad=10)
        axes[i].legend(fontsize=12)
        axes[i].tick_params(axis='both', which='major', labelsize=12)

    plt.tight_layout()
    fig.subplots_adjust(top=0.93)

    if output_dir:
        save_figure(fig, 'area_evolution.png', output_dir)

def perform_anova(df, wells):
    """Perform ANOVA test for each well to detect significant changes over time."""
    print("\nANOVA results:")
    anova_results = []
    for well in wells:
        well_data = df[df['well_id'] == well]
        groups = [group['area'].dropna().values for name, group in well_data.groupby('time')]
        groups = [group for group in groups if len(group) > 0]

        if len(groups) >= 2:
            try:
                f_value, p_value = stats.f_oneway(*groups)
                anova_results.append({
                    'well': well,
                    'f_value': f_value,
                    'p_value': p_value
                })
                print(f"\nWell {well}:")
                print(f"F-value: {f_value}")
                print(f"p-value: {p_value}")
            except Exception as e:
                print(f"\nWell {well} - Error in ANOVA:")
                print(str(e))
        else:
            print(f"\nWell {well} - Not enough groups for ANOVA")

    return pd.DataFrame(anova_results)

def calculate_area_ratio(group):
    """Calculate the ratio of final to initial area for a group."""
    first_time = group['time'].min()
    last_time = group['time'].max()
    first_area = group[group['time'] == first_time]['area'].mean()
    last_area = group[group['time'] == last_time]['area'].mean()
    return last_area / first_area if first_area != 0 else np.nan

def plot_area_ratio_heatmap(df, output_dir=None):
    """Plot a heatmap of organoid area ratios (last timepoint / first timepoint)."""
    area_ratios = df.groupby('well_id').apply(calculate_area_ratio)
    heatmap_data = area_ratios.reset_index()
    heatmap_data['row'] = heatmap_data['well_id'].str[0]
    heatmap_data['col'] = heatmap_data['well_id'].str[1:].astype(int)
    heatmap_matrix = heatmap_data.pivot(index='row', columns='col', values=0)

    fig = plt.figure(figsize=(10, 6))
    sns.heatmap(heatmap_matrix, annot=True, fmt='.2f', vmin=0, vmax=10,
                cmap='crest', cbar_kws={'label': 'Area Ratio (Last/First)'})
    plt.title('Heatmap of Organoid Area Ratio (Last Timepoint / First Timepoint)', fontsize=16)
    plt.xlabel('Column', fontsize=12)
    plt.ylabel('Row', fontsize=12)
    plt.tight_layout()

    if output_dir:
        save_figure(fig, 'area_ratio_heatmap.png', output_dir)

    print("\nArea Ratios (Last/First) for each well:")
    print(area_ratios)
    return area_ratios



In [None]:
# Main execution
if __name__ == "__main__":
    # Set up output directories
    base_output_dir = "/content/drive/MyDrive/ColabNotebooks/data/organoids/output"
    output_dirs = create_output_dirs(base_output_dir)

    # Image processing and data extraction
    directory = "/content/drive/MyDrive/ColabNotebooks/data/organoids/wells/"
    all_files = glob.glob(os.path.join(directory, "*.tif"))
    print(f"\nFound {len(all_files)} files to process")

    all_results = []
    for file_path in all_files:
        results = process_image(file_path, output_dirs)
        all_results.extend(results)

    # Create and save the DataFrame
    df = pd.DataFrame(all_results)
    output_csv = os.path.join(output_dirs['plots'], 'all_object_areas.csv')
    df.to_csv(output_csv, index=False)
    print(f"\nResults saved to {output_csv}")

    # Analysis and visualization
    wells = sorted(df['well_id'].unique())
    print(f"\nAnalyzing data for wells: {wells}")

    # Plot and save area evolution
    print("\nGenerating area evolution plot...")
    plot_area_evolution(df, wells, output_dirs['plots'])

    # Perform ANOVA analysis
    print("\nPerforming ANOVA analysis...")
    perform_anova(df, wells)

    # Generate and save area ratio heatmap
    print("\nGenerating area ratio heatmap...")
    plot_area_ratio_heatmap(df, output_dirs['plots'])

Created directory: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/processed_images/20250103_191317
Created directory: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/segmented_images/20250103_191317
Created directory: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/analysis_plots/20250103_191317
Created directory: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/diameter_plots/20250103_191317

Found 6 files to process

Processing file: /content/drive/MyDrive/ColabNotebooks/data/organoids/wells/A1.tif

Processing Well A1, Time point 1
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/diameter_plots/20250103_191317/diameter_distribution_A1_t1.png
Well A1, Time 1:
Number of objects considered: 157
Median diameter before scaling: 6.48
Estimated diameter after scaling: 7


100%|██████████| 25.3M/25.3M [00:01<00:00, 22.1MB/s]
  state_dict = torch.load(filename, map_location=device)
100%|██████████| 5.23k/5.23k [00:00<00:00, 5.00MB/s]


Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/processed_images/20250103_191317/processed_A1_t1.png
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/segmented_images/20250103_191317/segmented_A1_t1.png
Total objects detected for Well A1, Time 1: 495

Processing Well A1, Time point 2
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/diameter_plots/20250103_191317/diameter_distribution_A1_t2.png
Well A1, Time 2:
Number of objects considered: 203
Median diameter before scaling: 7.48
Estimated diameter after scaling: 8


  state_dict = torch.load(filename, map_location=device)


Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/processed_images/20250103_191317/processed_A1_t2.png
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/segmented_images/20250103_191317/segmented_A1_t2.png
Total objects detected for Well A1, Time 2: 407

Processing Well A1, Time point 3
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/diameter_plots/20250103_191317/diameter_distribution_A1_t3.png
Well A1, Time 3:
Number of objects considered: 199
Median diameter before scaling: 8.96
Estimated diameter after scaling: 10
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/processed_images/20250103_191317/processed_A1_t3.png
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/segmented_images/20250103_191317/segmented_A1_t3.png
Total objects detected for Well A1, Time 3: 338

Processing Well A1, Time point 4
Saved figure to: /content/drive/MyDrive/ColabNotebo

  state_dict = torch.load(filename, map_location=device)


Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/processed_images/20250103_191317/processed_A2_t1.png
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/segmented_images/20250103_191317/segmented_A2_t1.png
Total objects detected for Well A2, Time 1: 448

Processing Well A2, Time point 2
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/diameter_plots/20250103_191317/diameter_distribution_A2_t2.png
Well A2, Time 2:
Number of objects considered: 184
Median diameter before scaling: 7.05
Estimated diameter after scaling: 8
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/processed_images/20250103_191317/processed_A2_t2.png
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/segmented_images/20250103_191317/segmented_A2_t2.png
Total objects detected for Well A2, Time 2: 398

Processing Well A2, Time point 3
Saved figure to: /content/drive/MyDrive/ColabNoteboo

  state_dict = torch.load(filename, map_location=device)


Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/processed_images/20250103_191317/processed_A3_t1.png
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/segmented_images/20250103_191317/segmented_A3_t1.png
Total objects detected for Well A3, Time 1: 486

Processing Well A3, Time point 2
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/diameter_plots/20250103_191317/diameter_distribution_A3_t2.png
Well A3, Time 2:
Number of objects considered: 167
Median diameter before scaling: 7.65
Estimated diameter after scaling: 8
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/processed_images/20250103_191317/processed_A3_t2.png
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/segmented_images/20250103_191317/segmented_A3_t2.png
Total objects detected for Well A3, Time 2: 400

Processing Well A3, Time point 3
Saved figure to: /content/drive/MyDrive/ColabNoteboo

  state_dict = torch.load(filename, map_location=device)


Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/processed_images/20250103_191317/processed_B1_t1.png
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/segmented_images/20250103_191317/segmented_B1_t1.png
Total objects detected for Well B1, Time 1: 529

Processing Well B1, Time point 2
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/diameter_plots/20250103_191317/diameter_distribution_B1_t2.png
Well B1, Time 2:
Number of objects considered: 169
Median diameter before scaling: 6.48
Estimated diameter after scaling: 7
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/processed_images/20250103_191317/processed_B1_t2.png
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/segmented_images/20250103_191317/segmented_B1_t2.png
Total objects detected for Well B1, Time 2: 455

Processing Well B1, Time point 3
Saved figure to: /content/drive/MyDrive/ColabNoteboo

  state_dict = torch.load(filename, map_location=device)


Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/processed_images/20250103_191317/processed_B2_t1.png
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/segmented_images/20250103_191317/segmented_B2_t1.png
Total objects detected for Well B2, Time 1: 499

Processing Well B2, Time point 2
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/diameter_plots/20250103_191317/diameter_distribution_B2_t2.png
Well B2, Time 2:
Number of objects considered: 208
Median diameter before scaling: 7.05
Estimated diameter after scaling: 8
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/processed_images/20250103_191317/processed_B2_t2.png
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/segmented_images/20250103_191317/segmented_B2_t2.png
Total objects detected for Well B2, Time 2: 395

Processing Well B2, Time point 3
Saved figure to: /content/drive/MyDrive/ColabNoteboo

  state_dict = torch.load(filename, map_location=device)


Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/processed_images/20250103_191317/processed_B3_t1.png
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/segmented_images/20250103_191317/segmented_B3_t1.png
Total objects detected for Well B3, Time 1: 491

Processing Well B3, Time point 2
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/diameter_plots/20250103_191317/diameter_distribution_B3_t2.png
Well B3, Time 2:
Number of objects considered: 149
Median diameter before scaling: 6.58
Estimated diameter after scaling: 7
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/processed_images/20250103_191317/processed_B3_t2.png
Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/segmented_images/20250103_191317/segmented_B3_t2.png
Total objects detected for Well B3, Time 2: 403

Processing Well B3, Time point 3
Saved figure to: /content/drive/MyDrive/ColabNoteboo

  area_ratios = df.groupby('well_id').apply(calculate_area_ratio)


Saved figure to: /content/drive/MyDrive/ColabNotebooks/data/organoids/output/analysis_plots/20250103_191317/area_ratio_heatmap.png

Area Ratios (Last/First) for each well:
well_id
A1    4.702250
A2    3.887858
A3    4.614217
B1    1.633994
B2    4.397924
B3    3.359518
dtype: float64
