### Adaptive ROI Localization and Cropping

In [None]:
# IMPORT PACKAGES
import os
import cv2
import numpy as np
import csv
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# === Configurable Parameters ===
THRESHOLD = 50

RESIZE_DIMS = (640, 640)

DEBUG = True  # Set True to save visualizations
LOG_PATH = f"temp/preprocessing/preprocessing_log.csv"
DEBUG_DIR = "temp/preprocessing/image-crops"
os.makedirs(DEBUG_DIR, exist_ok=True)

In [None]:
# SUM PIXEL VALUES VERTICALLY
def vertical_sum(image):        
    return np.count_nonzero(image, axis=0)

In [None]:
# SUM PIXEL VALUES HORIZONTALLY
def horizontal_sum(image):        
    return np.count_nonzero(image > 0, axis=1)

In [None]:
# FIND EXTREME CHANGES
def find_extreme_changes(array, threshold=THRESHOLD):
    # Calculate the first derivative
    derivative = np.diff(array)
    # Find points where the derivative exceeds the threshold
    increases = np.where(derivative > threshold)[0]
    decreases = np.where(derivative < -threshold)[0]
    return increases, decreases

In [None]:
# CROP IMAGE
def crop_image_with_padding(image, x1, x2, y1, y2):
    h, w, c = image.shape
    crop_w = x2 - x1
    crop_h = y2 - y1
    cropped = np.zeros((crop_h, crop_w, c), dtype=np.uint8)

    x1_clamped = max(0, x1)
    y1_clamped = max(0, y1)
    x2_clamped = min(w, x2)
    y2_clamped = min(h, y2)

    insert_x1 = x1_clamped - x1
    insert_y1 = y1_clamped - y1

    crop = image[y1_clamped:y2_clamped, x1_clamped:x2_clamped]
    cropped[insert_y1:insert_y1 + crop.shape[0], insert_x1:insert_x1 + crop.shape[1]] = crop
    return cropped

In [None]:
# === Main Processing Function ===
def process_images(root_dir):
    data_processed_dir = os.path.join(root_dir, '..', f'1-processed')
    os.makedirs(data_processed_dir, exist_ok=True)

    with open(LOG_PATH, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["image_path", "roi_x1", "roi_x2", "roi_y1", "roi_y2", "roi_aspect_ratio", "status"])

        for class_dir in os.listdir(root_dir):
            class_path = os.path.join(root_dir, class_dir)
            
            if not os.path.isdir(class_path):
                continue

            for patient_dir in os.listdir(class_path):
                patient_path = os.path.join(class_path, patient_dir)
                
                if not os.path.isdir(patient_path):
                    continue

                for image_file in os.listdir(patient_path):
                    image_path = os.path.join(patient_path, image_file)
                    
                    if not image_file.endswith('.jpg'):
                        continue

                    # Read the image    
                    print(f"Processing {image_path}")
                    image = cv2.imread(image_path)
                    if image is None:
                        print(f"Warning: Unable to read {image_path}")
                        writer.writerow([image_path, None, None, None, None, None, "failed to read"])
                        continue

                    image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                    
                    # Calculate the vertical and horizontal sums
                    vertical_sums = vertical_sum(image_gray)
                    horizontal_sums = horizontal_sum(image_gray)

                    # Find the extreme changes in the vertical and horizontal sums
                    v_inc, v_dec = find_extreme_changes(vertical_sums)
                    h_inc, h_dec = find_extreme_changes(horizontal_sums)

                    if len(v_inc) < 2 or len(v_dec) < 2 or len(h_inc) == 0:
                        print(f"Skipping {image_path} due to insufficient edges")
                        writer.writerow([image_path, None, None, None, None, None, "insufficient edges"])
                        continue

                    roi_x1 = int(v_inc[1]) + 1
                    roi_x2 = int(v_dec[1])
                    roi_y1 = int(h_inc[0] - (roi_x2 - roi_x1) * 0.09)
                    roi_y2 = int((roi_x2 - roi_x1) + roi_y1)
                    roi_width = roi_x2 - roi_x1
                    roi_height = roi_y2 - roi_y1
                    roi_aspect_ratio = round(roi_width / roi_height, 4) if roi_height != 0 else None

                    image_cropped = crop_image_with_padding(image, roi_x1, roi_x2, roi_y1, roi_y2)
                    image_resized = cv2.resize(image_cropped, RESIZE_DIMS)

                    new_patient_dir = os.path.join(data_processed_dir, class_dir, patient_dir)
                    os.makedirs(new_patient_dir, exist_ok=True)
                    processed_path = os.path.join(new_patient_dir, image_file)
                    cv2.imwrite(processed_path, image_resized)

                    writer.writerow([image_path, roi_x1, roi_x2, roi_y1, roi_y2, roi_aspect_ratio, "processed"])
                    print(f"Saved: {processed_path}")

                    # Optional: Debug visualization
                    if DEBUG:
                        vis = image.copy()
                        cv2.rectangle(vis, (roi_x1, roi_y1), (roi_x2, roi_y2), (255, 0, 0), 2)
                        plt.imshow(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB))
                        plt.title(image_file)
                        plt.savefig(os.path.join(DEBUG_DIR, image_file.replace('.jpg', '_debug.png')))
                        plt.close()

In [None]:
# Run preprocessing
current_dir = os.getcwd()
print("Current Directory: ", current_dir)
print("Files: ", os.listdir(current_dir))

data_raw_dir = os.path.join('data', '0-raw')
process_images(data_raw_dir)