In [2]:
# 01_data_preprocessing.ipynb

# Install necessary packages
!pip install opencv-python


import glob
from pathlib import Path
import cv2

# Directory paths for SD1 dataset
DATA_DIR = Path("../data/SD1")
TRAIN_DIR = DATA_DIR / "train"
VAL_DIR = DATA_DIR / "val"

PROCESSED_TRAIN_DIR = DATA_DIR / "processed_train"
PROCESSED_VAL_DIR = DATA_DIR / "processed_val"

# Ensure processed directories exist
PROCESSED_TRAIN_DIR.mkdir(parents=True, exist_ok=True)
PROCESSED_VAL_DIR.mkdir(parents=True, exist_ok=True)

# Image dimensions to split and resize
GT_IMG_SIZE = (512, 512)  # Ground Truth Image size
GLARE_IMG_SIZE = (512, 512)  # Glare Image size
IMG_SIZE = (512, 512)  # Final output size for training

# Function to preprocess images
def preprocess_images(input_dir, output_dir):
    image_paths = glob.glob(str(input_dir / "*.png"))  # Assuming images are PNG format
    for img_path in image_paths:
        # Read image with RGBA channels (4 channels)
        img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
        
        # Split the image into three parts: Ground Truth, Glare Image, and Glare Mask
        ground_truth_img = img[:, :512]  # First part (512x512)
        glare_img = img[:, 512:1024]  # Second part (512x512)
        
        # Convert both images to grayscale (1 channel)
        ground_truth_img_gray = cv2.cvtColor(ground_truth_img, cv2.COLOR_RGBA2GRAY)
        glare_img_gray = cv2.cvtColor(glare_img, cv2.COLOR_RGBA2GRAY)
        
        # Resize to match the final output size (if required)
        ground_truth_img_gray = cv2.resize(ground_truth_img_gray, GT_IMG_SIZE)
        glare_img_gray = cv2.resize(glare_img_gray, GLARE_IMG_SIZE)
        
        # Save the processed Ground Truth and Glare Images (grayscale)
        ground_truth_save_path = output_dir / f"gt_{Path(img_path).name}"
        glare_save_path = output_dir / f"glare_{Path(img_path).name}"
        
        cv2.imwrite(str(ground_truth_save_path), ground_truth_img_gray)
        cv2.imwrite(str(glare_save_path), glare_img_gray)
    
    print(f"Preprocessed images saved in {output_dir}")

# Preprocess training and validation images
preprocess_images(TRAIN_DIR, PROCESSED_TRAIN_DIR)
preprocess_images(VAL_DIR, PROCESSED_VAL_DIR)


Preprocessed images saved in ..\data\SD1\processed_train
Preprocessed images saved in ..\data\SD1\processed_val
