# Shape Classifer
### **Introduction**
This script is designed to process imaging data from **MRI** or **CT** scans. It analyzes **TIFF image slices** contained in the group folder folder and extracts relevant geometric and statistical information about a segmented organ.

The script does the following calculations for each image slice:
- **Center of the Organ ("Center Glob")**:  
  Computes the center-of-mass of the non-black pixels, giving the approximate centroid location of the organ.
- **Minimum and Maximum Radius**:  
  Measures the shortest and longest distances from the center to the outermost edges of the segmented organ.  
- **Intensity Standard Deviation**:  
  Calculates how much pixel intensity varies, which may indicate texture differences or tissue heterogeneity.
- **Organ Area**:  
  Computes the number of non-black pixels and converts it into **mm²** using the known pixel-to-mm scale.  
- **Estimated Mass Calculation (Second Script Only)**:  
  - Uses per-pixel intensity to interpolate an approximate tissue density.
  - Converts the pixel area into **cm³** volume using **CT/MRI-specific slice thickness**.
  - Computes the **total mass** of the segmented organ.
  - Estimates the **confidence interval** for the total mass by propagating measurement uncertainties.

| Min/Max Radius | Sample Area |
|---------------|------------|
| ![Min/Max Radius](https://raw.githubusercontent.com/agadin/QP2_big_data_project_tools/refs/heads/main/img/min_max_radius.png) | ![Sample Area](https://raw.githubusercontent.com/agadin/QP2_big_data_project_tools/refs/heads/main/img/sample_area.png) |


---

### **Instructions**
1. **Run the script and select the root directory**  
   - This should be the folder that contains subfolders with **MRI** or **CT** in their names.
   - Each subfolder should contain **TIFF image slices** representing the segmented organ.

2. **Choose whether to process "MRI" or "CT"**  
   - The script will only process the selected scan type (either MRI or CT).
   - It will look for **four** folders that match the scan type.

3. **Wait for processing to complete**  
   - The script will analyze each image in the selected folders.
   - Results will be saved to a **CSV file** named `image_analysis_output.csv` or `mass_estimation_output.csv` in the selected directory.

4. **Review results**  
   - The CSV file contains **calculated measurements** for each image.
   - The second script will additionally display **total estimated mass per folder** and the **overall mass with uncertainty**.

After processing, the results can be used for further statistical or graphical analysis of organ properties across slices.


In [None]:
!pip install SimpleITK numpy
!pip show SimpleITK


In [None]:
import sys
import os
import glob
import csv
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from scipy.ndimage import center_of_mass
import SimpleITK as sitk
# Note: cv2 is used as a fallback but not imported by default. Import if needed:
# import cv2

PIXEL_SCALE = 10.0 / 17.53  # 0.571 mm per pixel

def process_image(image_path):
    """
    Loads a PNG image, converts it to grayscale, and computes statistics on non-black pixels.
    Implements multiple fallback methods if an image fails to open.
    """
    data = None  # Initialize the image data variable

    # Try opening with PIL (default method)
    try:
        image = Image.open(image_path).convert('L')
        data = np.array(image)
    except Exception as e:
        print(f"Warning: PIL failed to open {image_path}. Trying OpenCV. Error: {e}")

    # Fallback 1: Try OpenCV (cv2)
    if data is None:
        try:
            import cv2
            data = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            if data is None:
                raise ValueError("cv2.imread returned None")
        except Exception as e:
            print(f"Warning: OpenCV failed to open {image_path}. Trying SimpleITK. Error: {e}")

    # Fallback 2: Try SimpleITK (sitk)
    if data is None:
        try:
            sitk_image = sitk.ReadImage(image_path)
            data = sitk.GetArrayFromImage(sitk_image)
        except Exception as e:
            print(f"Skipping {image_path} due to conversion error.")
            print(f"Error reading image with SimpleITK: {e}")
            return None  # Skip the image and continue

    # If no valid image data was loaded, return None
    if data is None:
        print(f"Skipping {image_path} due to persistent read failure.")
        return None

    # --- Process Image Data ---
    total_pixels = data.size
    black_pixels = np.sum(data == 0)
    black_ratio = black_pixels / total_pixels

    if black_ratio > 0.99:
        return None  # Skip if mostly black

    indices = np.argwhere(data != 0)

    # Compute the center in pixels
    center_pixels = indices.mean(axis=0)  # [row, col]

    # Convert to mm (using pre-defined pixel scale)
    center = center_pixels * PIXEL_SCALE  # Convert to mm

    # Compute distances (in pixels) from the center
    distances_pixels = np.sqrt(((indices - center_pixels) ** 2).sum(axis=1))
    min_radius = distances_pixels.min() * PIXEL_SCALE  # Convert to mm
    max_radius = distances_pixels.max() * PIXEL_SCALE  # Convert to mm

    # Compute intensity standard deviation from non-black pixels
    non_black_intensities = data[data != 0]
    intensity_std = non_black_intensities.std()

    # Compute area in pixel count and convert to mm².
    area_pixels = len(non_black_intensities)
    area = area_pixels * (PIXEL_SCALE ** 2)  # mm²

    return {
        'center': center,          # [row (mm), col (mm)]
        'min_radius': min_radius,  # in mm
        'max_radius': max_radius,  # in mm
        'intensity_std': intensity_std,
        'area': area               # in mm²
    }


def process_folder(folder_path):
    """
    Processes all PNG images in a folder and returns a list of result dictionaries.
    Each dictionary includes the image filename.
    """
    png_files = sorted(glob.glob(os.path.join(folder_path, '*.png')))
    results = []
    for idx, png_file in enumerate(png_files):
        res = process_image(png_file)
        if res is not None:
            res['filename'] = os.path.basename(png_file)
            res['index'] = idx
            results.append(res)
    return results


def write_csv(output_path, folder_results):
    """
    Writes the results to a CSV file at output_path.
    The CSV includes columns: Folder, Image Index, Filename, Center_Y (mm), Center_X (mm),
    Min_Radius (mm), Max_Radius (mm), Intensity_STD, Area (mm²).
    """
    header = ['Folder', 'Image Index', 'Filename', 'Center_Y (mm)', 'Center_X (mm)',
              'Min_Radius (mm)', 'Max_Radius (mm)', 'Intensity_STD', 'Area (mm²)']
    with open(output_path, mode='w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(header)
        for folder_name, results in folder_results.items():
            for res in results:
                row = [
                    folder_name,
                    res['index'],
                    res['filename'],
                    f"{res['center'][0]:.2f}",
                    f"{res['center'][1]:.2f}",
                    f"{res['min_radius']:.2f}",
                    f"{res['max_radius']:.2f}",
                    f"{res['intensity_std']:.2f}",
                    f"{res['area']:.2f}"
                ]
                writer.writerow(row)
    print(f"CSV file saved to: {output_path}")

def get_target_folders(parent_path, scan_type):
    """
    Returns a list of folders to process.
    If the selected folder contains PNG files, it is returned as the only target folder.
    Otherwise, it searches for subdirectories whose names contain the scan type.
    """
    # Check if the selected folder itself contains PNG files.
    png_files = glob.glob(os.path.join(parent_path, '*.png'))
    if png_files:
        return [parent_path]

    # Otherwise, look for subdirectories with the scan type in their name (case-insensitive).
    target_folders = []
    for d in os.listdir(parent_path):
        d_full = os.path.join(parent_path, d)
        if os.path.isdir(d_full) and scan_type.lower() in d.lower():
            target_folders.append(d_full)
    return target_folders


def main():
    import tkinter as tk
    from tkinter import filedialog
    root = tk.Tk()
    root.withdraw()

    # Ask the user to select a directory
    path = filedialog.askdirectory(title='Select Directory containing PNG files or subfolders')
    if not path:
        print("No directory selected.")
        return

    # Ask the user whether to process CT or MRI folders.
    # (This is only used when the selected directory doesn't contain PNG files directly.)
    scan_type = input("Enter scan type to process (CT or MRI): ").strip().upper()
    if scan_type not in ["CT", "MRI"]:
        print("Invalid input. Please enter 'CT' or 'MRI'.")
        return

    target_folders = get_target_folders(path, scan_type)
    if not target_folders:
        print(f"No target folders found containing '{scan_type}', and no PNG files found in the selected folder.")
        return

    folder_results = {}
    for folder in target_folders:
        folder_name = os.path.basename(folder)
        print(f"Processing folder: {folder_name}")
        results = process_folder(folder)
        if results:
            folder_results[folder_name] = results
        else:
            print(f"  No valid images found in {folder_name}.")

    if not folder_results:
        print("No valid images found in any target folders.")
        return

    output_csv = os.path.join(path, "image_analysis_output.csv")
    write_csv(output_csv, folder_results)


if __name__ == '__main__':
    main()

# Estimate Organ Mass
### Input Approximate Densities for Tissue Types

In this cell, you can set the approximate densities (in g/cm³) for:
- **Bright spots:** regions that are brighter on CT/MRI (e.g., calcifications or bone)
- **Dark spots:** regions that are darker (e.g., soft tissue)

Look up the densities of what you beleive the dark/light spots are and include those in your report if you report the estimate mass. Make sure to look up values relative to CT and MRI. A linear gradient is used: intensity 1 corresponds approximately to `density_dark` and intensity 255 to `density_bright`.

In [None]:
# Set the approximate densities (in g/cm³)
density_bright = 1 # Change None to your value (somehwere around 1 for MRI)
density_dark   = 0.3  # Change None to your value (somehwere around 0.3 for example for CT)

### Calculate Estimated Mass Based on Image Analysis and Tissue Densities

In this cell:
1. Select the root directory that contains subfolders with "CT" or "MRI" in their names.
2. For each TIFF image in those folders, load the image in grayscale.
3. For each pixel:
    - If the pixel is black (intensity = 0), its mass contribution is 0.
    - Otherwise, we compute a linear interpolation:
        factor = (I - 1) / 254.0   (maps intensity from 1–255 to 0–1) 
        pixel_density = density_dark + factor * (density_bright - density_dark)
4. Multiply the per-pixel density by the voxel volume to get the mass contribution.
5. Sum the contributions across the image, and then across all images.

**Imaging Parameters:**  
- **pixel_spacing:** The size of one pixel in mm.  
- **slice_thickness:** The thickness (distance between slices) in mm.

The results (mass per image and total mass) are saved to a CSV file.

In [None]:
# %% [markdown]
# ### Compute Estimated Mass with Propagated Error Using Image Data
#
# This cell:
# 1. Prompts for the root directory (which should contain CT and MRI folders or the PNG files directly).
# 2. For each image:
#    - Loads the PNG image and computes its mass slice using a pixel-by-pixel density,
#      interpolated linearly from density_dark to density_bright (with black pixels contributing no mass).
#    - Uses imaging parameters:
#         - Pixel spacing: 10 mm per 17.53 pixels.
#         - Slice thickness: 4 mm for CT and 1 mm for MRI.
# 3. Propagates uncertainties from:
#    - Pixel spacing (δp = 0.01 mm),
#    - Slice thickness (δt = 0.2 mm for CT, 0.1 mm for MRI),
#    - Densities (δdensity = 0.02 g/cm³).
# 4. Computes a per-image mass error and then calculates the total mass and its combined error.
# 5. Saves the per-image results (including mass and error) to a CSV file.

import os
import glob
import numpy as np
from PIL import Image
import pandas as pd
import tkinter as tk
from tkinter import filedialog
import math

def compute_image_mass_and_error(image_path, density_dark, density_bright, pixel_spacing, slice_thickness, delta_p, delta_t, delta_density):
    """
    Compute the mass contribution of an image slice and its propagated error.

    Parameters:
      image_path      : Path to the PNG image (8-bit grayscale).
      density_dark    : Density for the darkest (non-zero) pixels (g/cm³).
      density_bright  : Density for the brightest pixels (g/cm³).
      pixel_spacing   : Pixel spacing (mm per pixel).
      slice_thickness : Slice thickness (mm).
      delta_p         : Uncertainty in pixel spacing (mm).
      delta_t         : Uncertainty in slice thickness (mm).
      delta_density   : Uncertainty in density (g/cm³) (assumed same for both dark and bright).

    Returns:
      mass (in grams) and estimated error (in grams) for the image slice.
    """
    # Load image in grayscale.
    image = Image.open(image_path).convert('L')
    data = np.array(image, dtype=np.float32)

    # Create mask for non-black pixels.
    mask = data > 0
    if np.sum(mask) == 0:
        return 0.0, 0.0  # If all pixels are black, no mass.

    # Map non-black intensities (1-255) to a factor in [0, 1]: 1 -> 0, 255 -> 1.
    factor = (data[mask] - 1) / 254.0
    # Compute per-pixel effective density by linear interpolation.
    pixel_densities = density_dark + factor * (density_bright - density_dark)

    # Compute voxel volume in cm³.
    voxel_volume_cm3 = (pixel_spacing**2 * slice_thickness) / 1000.0  # mm³ -> cm³

    # Compute mass for this slice.
    mass = np.sum(pixel_densities) * voxel_volume_cm3  # in grams

    # ----- Error Propagation -----
    # 1. Voxel volume V = (pixel_spacing^2 * slice_thickness) / 1000.
    #    Relative error in V: δV/V = sqrt((2*δp/p)^2 + (δt/t)^2)
    rel_err_volume = math.sqrt((2 * delta_p / pixel_spacing)**2 + (delta_t / slice_thickness)**2)

    # 2. Effective density d = density_dark + factor*(density_bright - density_dark).
    #    Approximate its relative error using the average density.
    avg_density = (density_dark + density_bright) / 2.0
    rel_err_density = delta_density / avg_density  # approximate relative error

    # 3. Combine the relative errors in quadrature.
    rel_err = math.sqrt(rel_err_volume**2 + rel_err_density**2)

    # Mass error for this image.
    mass_error = mass * rel_err
    return mass, mass_error

def get_target_folders(parent_path, scan_type):
    """
    Returns a list of folders to process.

    If the selected folder contains PNG files directly, it is returned as the only target folder.
    Otherwise, searches for subdirectories whose names (case-insensitive) contain the scan type.
    """
    # Check if the selected folder itself contains PNG files.
    png_files = glob.glob(os.path.join(parent_path, '*.png'))
    if png_files:
        return [parent_path]

    # Otherwise, look for subdirectories with the scan type in their name.
    target_folders = []
    for d in os.listdir(parent_path):
        d_full = os.path.join(parent_path, d)
        if os.path.isdir(d_full) and scan_type.lower() in d.lower():
            target_folders.append(d_full)
    return target_folders

# --- User selects the root directory ---
root = tk.Tk()
root.withdraw()
selected_dir = filedialog.askdirectory(title='Select Directory containing PNG files or subfolders')
if not selected_dir:
    raise Exception("No directory selected.")

# Ask the user whether to process CT or MRI folders.
scan_type = input("Enter scan type to process (CT or MRI): ").strip().upper()
if scan_type not in ["CT", "MRI"]:
    raise Exception("Invalid input. Please enter 'CT' or 'MRI'.")

target_folders = get_target_folders(selected_dir, scan_type)
if not target_folders:
    raise Exception(f"No target folders found containing '{scan_type}' or PNG files in the selected directory.")

# Define imaging and density parameters.
pixel_spacing = 10.0 / 17.53  # mm per pixel
delta_p = 0.01                # uncertainty in pixel spacing (mm)
delta_density = 0.02          # uncertainty in density (g/cm³)

# Set your density values (example values).
density_dark = 0.3    # g/cm³ (for darkest non-zero pixels)
density_bright = 1.0  # g/cm³ (for brightest pixels)

mass_records = []
total_mass_squared_error = 0.0

# Process each target folder.
for folder in target_folders:
    folder_name = os.path.basename(folder)
    # Set slice thickness and its uncertainty based on the folder name.
    if 'CT' in folder_name.upper():
        slice_thickness = 4.0
        delta_t = 0.2
    elif 'MRI' in folder_name.upper():
        slice_thickness = 1.0
        delta_t = 0.1
    else:
        # Default values if scan type is not in folder name.
        slice_thickness = 1.0
        delta_t = 0.1

    # Look for PNG files in the folder.
    png_files = sorted(glob.glob(os.path.join(folder, '*.png')))
    if not png_files:
        print(f"No PNG files found in {folder_name}.")
        continue

    for idx, png_file in enumerate(png_files):
        mass, mass_error = compute_image_mass_and_error(
            png_file,
            density_dark,
            density_bright,
            pixel_spacing,
            slice_thickness,
            delta_p,
            delta_t,
            delta_density
        )
        mass_records.append({
            'Folder': folder_name,
            'Image Index': idx,
            'Filename': os.path.basename(png_file),
            'Slice Thickness (mm)': slice_thickness,
            'Mass (g)': mass,
            'Mass Error (g)': mass_error
        })
        total_mass_squared_error += mass_error**2

# Create a DataFrame with the results.
mass_df = pd.DataFrame(mass_records)

# Compute and print the total estimated mass per folder.
print("\nTotal Estimated Mass per Folder:")
folder_mass_totals = mass_df.groupby('Folder')['Mass (g)'].sum()
for folder, total_mass in folder_mass_totals.items():
    print(f"  {folder}: {total_mass:.3f} g")

# Compute and print the overall average mass and combined error across all folders.
total_mass = mass_df['Mass (g)'].sum()
average_mass = total_mass / len(target_folders)
total_mass_error = math.sqrt(total_mass_squared_error)  # Combined uncertainty

print(f"\nOverall Average Estimated Mass: {average_mass:.3f} g ± {total_mass_error:.3f} g")

# Save the mass estimation details to a CSV file.
mass_csv = os.path.join(selected_dir, "mass_estimation_output.csv")
mass_df.to_csv(mass_csv, index=False)
print(f"Mass estimation details saved to: {mass_csv}")