In [1]:
from tensorflow.keras import layers
from tensorflow import keras
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.preprocessing.image import load_img, img_to_array

import os
from PIL import Image
import shutil

import matplotlib.pyplot as plt

import cv2
import numpy as np
import random
from glob import glob
import shutil

In [None]:
# Define paths
source_dir = r"C:\Users\uzmap\Documents\GitHub\CarDrivingAssistance\dataset\IDD_Segmentation\leftImg8bit\train"
output_dir = "dataset/CycleGANShorter/trainB"  # Target directory for clean images

os.makedirs(output_dir, exist_ok=True)

# Extract images from all numbered folders (limit to 1500)
folders = sorted(os.listdir(source_dir))
image_count = 0
max_images = 1500

for folder in folders:
    folder_path = os.path.join(source_dir, folder)
    images = glob(f"{folder_path}/*.png")  # Adjust extension if needed
    
    for img in images:
        if image_count >= max_images:
            break  # Stop if we reach 1500 images
        
        shutil.copy(img, os.path.join(output_dir, os.path.basename(img)))
        image_count += 1

    if image_count >= max_images:
        break  # Stop processing further folders if the limit is reached

print(f"{image_count} clean images extracted successfully in trainB!")


1500 clean images extracted successfully in trainB!


In [6]:
# Define dataset paths
source_dirs = {
    "FOG": r"C:\Users\uzmap\Documents\GitHub\CarDrivingAssistance\dataset\IDDAW\train\FOG\rgb",
    "RAIN": r"C:\Users\uzmap\Documents\GitHub\CarDrivingAssistance\dataset\IDDAW\train\RAIN\rgb",
    "LOWLIGHT": r"C:\Users\uzmap\Documents\GitHub\CarDrivingAssistance\dataset\IDDAW\train\LOWLIGHT\rgb",
    "SNOW": r"C:\Users\uzmap\Documents\GitHub\CarDrivingAssistance\dataset\IDDAW\train\SNOW\rgb",
}

output_dir = "dataset/CycleGANShorter/trainA"
os.makedirs(output_dir, exist_ok=True)

# Move all images with unique names
for category, src in source_dirs.items():
    images = glob(f"{src}/**/*.png", recursive=True)  # Adjust extension if needed
    for img in images:
        base_name = os.path.basename(img)  # Original filename
        new_name = f"{category}_{base_name}"  # Prefix with folder name
        shutil.copy(img, os.path.join(output_dir, new_name))

print("All noisy images moved to trainA with unique names!")


All noisy images moved to trainA with unique names!


In [7]:
import os
import cv2
import numpy as np
import random
from glob import glob

# Base paths
base_path = r"D:\CarDrivingAssistance\dataset\CycleGANShorter\trainA"  # All images
output_path = r"D:\CarDrivingAssistance\dataset\CycleGANShorter\trainA_balanced"
os.makedirs(output_path, exist_ok=True)

# Target images per class
target_per_class = 375 

# Categories
categories = ["FOG", "LOWLIGHT", "RAIN", "SNOW"]

# Function to apply safe augmentations
def augment_image(img):
    """Apply random but safe augmentations for road scene data."""
    
    # Random horizontal flip
    if random.random() > 0.5:
        img = cv2.flip(img, 1)

    # Random brightness/contrast
    alpha = random.uniform(0.7, 1.3)  
    beta = random.randint(-40, 40)  
    img = np.clip(alpha * img + beta, 0, 255).astype(np.uint8)

    return img

# Get all images in trainA
all_images = glob(f"{base_path}/*.png")

# Group images by their category using filename prefixes
category_images = {cat: [] for cat in categories}
for img_path in all_images:
    filename = os.path.basename(img_path)
    for cat in categories:
        if filename.startswith(cat):  # Check prefix
            category_images[cat].append(img_path)

# Balance dataset by copying originals first, then augmenting if needed
for category, images in category_images.items():
    num_existing = len(images)
    num_needed = max(0, target_per_class - num_existing)

    print(f"Processing {category}: {num_existing} ‚Üí {target_per_class} (Adding {num_needed})")

    # Copy original images first
    for img_path in images:
        img = cv2.imread(img_path)
        new_name = os.path.basename(img_path)  # Keep original filename
        cv2.imwrite(os.path.join(output_path, new_name), img)  # Save original

    # Generate augmented images if needed
    for i in range(num_needed):
        img_path = random.choice(images)
        img = cv2.imread(img_path)
        img = augment_image(img)
        
        # Save augmented images with unique names
        new_name = f"{category}_aug_{i}_{os.path.basename(img_path)}"
        cv2.imwrite(os.path.join(output_path, new_name), img)

print(f"`trainA_balanced` now has exactly {target_per_class * len(categories)} images (375 per class).")

Processing FOG: 175 ‚Üí 375 (Adding 200)
Processing LOWLIGHT: 78 ‚Üí 375 (Adding 297)
Processing RAIN: 152 ‚Üí 375 (Adding 223)
Processing SNOW: 167 ‚Üí 375 (Adding 208)
`trainA_balanced` now has exactly 1500 images (375 per class).


In [8]:
data_dir = r"D:\CarDrivingAssistance\dataset\CycleGANShorter"
trainA_dir = os.path.join(data_dir, "trainA_balanced")  # Rename it later to trainA
trainB_dir = os.path.join(data_dir, "trainB")
testA_dir = os.path.join(data_dir, "testA")
testB_dir = os.path.join(data_dir, "testB")

# Create test folders
os.makedirs(testA_dir, exist_ok=True)
os.makedirs(testB_dir, exist_ok=True)

# Define test split percentage
test_ratio = 0.2  # 20% for testing

def split_data(train_dir, test_dir, test_ratio):
    """Move a percentage of images from train to test."""
    images = glob(f"{train_dir}/*.png")  # Adjust extension if needed
    test_size = int(len(images) * test_ratio)

    test_images = random.sample(images, test_size)

    for img in test_images:
        shutil.move(img, os.path.join(test_dir, os.path.basename(img)))

    print(f"Moved {test_size} images from {train_dir} ‚Üí {test_dir}")

# Split trainA ‚Üí testA and trainB ‚Üí testB
split_data(trainA_dir, testA_dir, test_ratio)
split_data(trainB_dir, testB_dir, test_ratio)

print("Dataset split completed!")

Moved 300 images from D:\CarDrivingAssistance\dataset\CycleGANShorter\trainA_balanced ‚Üí D:\CarDrivingAssistance\dataset\CycleGANShorter\testA
Moved 299 images from D:\CarDrivingAssistance\dataset\CycleGANShorter\trainB ‚Üí D:\CarDrivingAssistance\dataset\CycleGANShorter\testB
Dataset split completed!


In [9]:
import os
from PIL import Image

# Dataset directories to scan
dataset_dirs = [
    "D:/CarDrivingAssistance/dataset/CycleGANShorter/trainA_balanced",
    "D:/CarDrivingAssistance/dataset/CycleGANShorter/trainB",
    "D:/CarDrivingAssistance/dataset/CycleGANShorter/testA",
    "D:/CarDrivingAssistance/dataset/CycleGANShorter/testB"
]

corrupt_files = []

# Function to check corrupt images
def find_corrupt_images(folder):
    global corrupt_files
    if not os.path.exists(folder):
        print(f"Skipping missing folder: {folder}")
        return
    
    print(f"üîç Checking images in {folder}...")

    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
        try:
            with Image.open(file_path) as img:
                img.load()  # Force loading the image fully
        except (OSError, IOError):
            print(f"Corrupt image detected: {file_path}")
            corrupt_files.append(file_path)

# Scan all dataset folders
for folder in dataset_dirs:
    find_corrupt_images(folder)

# Print corrupt file names
print("\nCorrupt images detected:")
for file in corrupt_files:
    print(file)

print(f"\nTotal corrupt images found: {len(corrupt_files)}")

#D:\CarDrivingAssistance\dataset\trainB\007949_leftImg8bit.png was found corrupted and was deleted

üîç Checking images in D:/CarDrivingAssistance/dataset/CycleGANShorter/trainA_balanced...
üîç Checking images in D:/CarDrivingAssistance/dataset/CycleGANShorter/trainB...
üîç Checking images in D:/CarDrivingAssistance/dataset/CycleGANShorter/testA...
üîç Checking images in D:/CarDrivingAssistance/dataset/CycleGANShorter/testB...

Corrupt images detected:

Total corrupt images found: 0
