# 1. Data Exploration and Preprocessing Visualization

**Objective:** Understand the dataset's composition and visualize the effect of our preprocessing pipeline, particularly the Contrast Limited Adaptive Histogram Equalization (CLAHE).

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from glob import glob

# Adjust the path to import from the src directory
import sys
sys.path.append('../')
from src.data_utils import get_transforms

### Check Data Paths and Class Balance

In [None]:
DATA_DIR = '../data/train'
mature_paths = glob(os.path.join(DATA_DIR, 'mature', '*'))
immature_paths = glob(os.path.join(DATA_DIR, 'immature', '*'))

print(f"Found {len(mature_paths)} mature cataract images.")
print(f"Found {len(immature_paths)} immature cataract images.")
print(f"Total training images: {len(mature_paths) + len(immature_paths)}")

### Visualize Sample Images (Mature vs. Immature)

In [None]:
def show_samples(mature_path, immature_path):
    fig, ax = plt.subplots(1, 2, figsize=(12, 6))
    
    mature_img = cv2.imread(mature_path)
    mature_img = cv2.cvtColor(mature_img, cv2.COLOR_BGR2RGB)
    ax[0].imshow(mature_img)
    ax[0].set_title('Sample Mature Cataract')
    ax[0].axis('off')
    
    immature_img = cv2.imread(immature_path)
    immature_img = cv2.cvtColor(immature_img, cv2.COLOR_BGR2RGB)
    ax[1].imshow(immature_img)
    ax[1].set_title('Sample Immature Cataract')
    ax[1].axis('off')
    
    plt.show()

# Show one example
if mature_paths and immature_paths:
    show_samples(mature_paths[0], immature_paths[0])

### Visualize the Effect of CLAHE Preprocessing

In [None]:
def visualize_transforms(image_path):
    # Load image
    original_image = cv2.imread(image_path)
    original_image_rgb = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

    # Get the transformation pipeline (which includes CLAHE)
    transform = get_transforms(is_train=False)
    
    # Apply just the CLAHE part for visualization
    clahe_transform = transform.transforms[1]
    clahe_image = clahe_transform(image=original_image)['image']
    clahe_image_rgb = cv2.cvtColor(clahe_image, cv2.COLOR_BGR2RGB)

    fig, ax = plt.subplots(1, 2, figsize=(12, 6))
    ax[0].imshow(original_image_rgb)
    ax[0].set_title('Original Image')
    ax[0].axis('off')

    ax[1].imshow(clahe_image_rgb)
    ax[1].set_title('Image after CLAHE')
    ax[1].axis('off')

    plt.show()

# Visualize the effect on a mature cataract image
if mature_paths:
    visualize_transforms(mature_paths[0])