In [1]:

# Standard library imports
import os
import sys
import logging

# Third-party imports
import pandas as pd
import numpy as np
import cv2

# Local module imports
sys.path.append(os.path.abspath('..'))
from utils.utils_post_process import post_process
from utils.utils_vis import save_image, plot_artery_ann
from utils.utils_data import get_classifications, get_segmentations, get_measurements
from utils.utils_geometry import get_contours, is_contour_intersecting_or_within
from utils.utils_measure import measure_thickness
from utils.utils_constants import (VESSEL_NEPTUNE_PAT_INFO_PATH as VESSEL_PAT_INFO_PATH, 
                                   CLASSIFICATION_PATH, SEGMENTATION_DIR,
                                   MEASUREMENTS_DIR, FEATURES_PATH, CROPPED_VESSELS_COMBINED_DIR)
from utils.utils_feature import extract_features

# Logging configuration
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [2]:
def extract_base_features(cnt_outer, cnts_middle, cnts_inner, cnts_hys):
    artery_area = cv2.contourArea(cnt_outer)
    lumen_area = sum(cv2.contourArea(contour) for contour in cnts_inner)
    intima_area = sum(cv2.contourArea(contour) for contour in cnts_middle) - lumen_area
    media_area = artery_area - lumen_area - intima_area
    hys_area = sum(cv2.contourArea(contour) for contour in cnts_hys)

    base_features = {
        'Area_Media': media_area,
        'Area_Intima': intima_area,
        'Area_Lumen': lumen_area,
        'Area_Hys': hys_area
    }
    return base_features

def extract_measurement_features(measurements_vessel, artery_area):
    # Initialize arrays for ratio calculations
    all_media = []
    all_intima = []
    all_ratio = []
    # Iterate over each row in the measurement DataFrame
    for m in measurements_vessel:
        m_media = np.array(m["Thickness_Media"])
        m_intima = np.array(m["Thickness_Intima"])

        # Example processing assuming m_media and m_intima are arrays
        m_wall = np.array([x + y if x >= 0 else x for x, y in zip(m_media, m_intima)])

        m_media, m_intima, m_ratio = post_process(m_media, m_intima, m_wall,
                                                  t_multi=15, t_open_lumen=30, t_mediam=15, t_average=15, 
                                                  artery_area=artery_area)
        all_media.extend(m_media)
        all_intima.extend(m_intima)
        all_ratio.extend(m_ratio)

    # Assuming all_media and all_intima are lists of arrays, we concatenate them to perform a global calculation
    features_intima, features_media, features_ratio = extract_features(all_media, all_intima, all_ratio)
    return {**features_intima, **features_media, **features_ratio}


def extract_features_slide(classifications, segmentations, measurements, slide_basename):
    slide_features = []
    for _, row in classifications.iterrows():
        img_name = row["Image Name"]
        bbox_x, bbox_y, bbox_width, bbox_height = map(int, row["Bounding Box"].split(","))  
        cnt_outer, cnts_middle, cnts_inner, cnts_hys = get_contours(segmentations, slide_basename, img_name,
                                                                    bbox_x, bbox_y, bbox_width, bbox_height)
        base_features = extract_base_features(cnt_outer, cnts_middle, cnts_inner, cnts_hys)

        measurements_vessel = measurements[img_name]
        if len(measurements_vessel) == 0:
            logging.warning(
                    f"No measurements for image {img_name} in slide {slide_basename}."
                    )
            measurement_features = {}
        else:
            measurement_features = extract_measurement_features(measurements_vessel, cv2.contourArea(cnt_outer))

        slide_features.append({**{"Slide Name": slide_basename}, **row.to_dict(), **base_features, **measurement_features})
    return slide_features


In [3]:
pat_df = pd.read_csv(VESSEL_PAT_INFO_PATH)
suffix = "_measurements"

available_sheetnames = pd.ExcelFile(CLASSIFICATION_PATH).sheet_names
logging.info(f"{len(pat_df)} slides selected, {len(pat_df) - len(available_sheetnames)} discarded, " 
             f"{len(available_sheetnames)} left for analysis.")

excel_writer = pd.ExcelWriter(FEATURES_PATH, engine='xlsxwriter')

collected_features = []
for i, slide_filename in enumerate(pat_df["WSI_Selected"]):
    if slide_filename != "11_26609_000_009_L2_TRI.svs":continue
    logging.info(f"Processing: {i+1}/{len(pat_df)}: {slide_filename}")
    slide_basename = os.path.splitext(slide_filename)[0]

    classifications = get_classifications(CLASSIFICATION_PATH, slide_basename, available_sheetnames, remove_others=False)
    if classifications.empty:
        continue  # Skip to if no relevant data

    segmentations_path = os.path.join(SEGMENTATION_DIR, f"{slide_basename}.geojson")
    segmentations = get_segmentations(segmentations_path, clean=True)

    measurements_path = os.path.join(MEASUREMENTS_DIR, f"{slide_basename}{suffix}.json")
    measurements = get_measurements(measurements_path, clean=True)
    slide_features = extract_features_slide(classifications, segmentations, measurements, slide_basename)
    collected_features.extend(slide_features)

2024-07-16 03:46:36,257 - INFO - 247 slides selected, 15 discarded, 232 left for analysis.
2024-07-16 03:46:36,276 - INFO - Processing: 2/247: 11_26609_000_009_L2_TRI.svs


[]
