# **Preprocessing**

In [6]:
import os
from PIL import Image, ImageOps
import numpy as np
!pip install opencv-python
import cv2

Collecting opencv-python
  Downloading opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (19 kB)
Downloading opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (67.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m65.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: opencv-python
Successfully installed opencv-python-4.12.0.88


In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
from typing_extensions import Sized
input_images = "/content/drive/MyDrive/Raw Images"
output_images = "/content/drive/MyDrive/clean Images"
target_size = 512

In [9]:
os.makedirs(output_images, exist_ok=True)

In [10]:
#Crops the image to a centered square
def center_crop(img):
    width, height = img.size
    crop_size = min(width, height)

    left = (width - crop_size) // 2
    top = (height - crop_size) // 2
    right = left + crop_size
    bottom = top + crop_size

    return img.crop((left, top, right, bottom))

#Applies light normalization to even out brightness & contrast
def normalize_color(img):
    img_cv = np.array(img)
    img_yuv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2YUV)

    # histogram equalization on the Y channel (brightness)
    img_yuv[:, :, 0] = cv2.equalizeHist(img_yuv[:, :, 0])

    img_norm = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2RGB)
    return Image.fromarray(img_norm)

#Re-saves the image without EXIF metadata
def remove_metadata(img):
    data = list(img.getdata())
    new_img = Image.new(img.mode, img.size)
    new_img.putdata(data)
    return new_img

In [11]:
for filename in os.listdir(input_images):
    if not filename.lower().endswith((".png", ".jpg", ".jpeg")):
        continue

    path = os.path.join(input_images, filename)

    try:
        img = Image.open(path).convert("RGB")

        # 1. Center Crop to Square
        img = center_crop(img)

        # 2. Resize to Target Size
        img = img.resize((target_size, target_size), Image.LANCZOS)

        # 3. Color & Contrast Normalization
        img = normalize_color(img)

        # 4. Remove EXIF Metadata
        img = remove_metadata(img)

        # 5. Save as PNG for quality
        output_path = os.path.join(
            output_images,
            os.path.splitext(filename)[0] + ".png"
        )
        img.save(output_path, format="PNG")

        print("Processed:", filename)

    except Exception as e:
        print("Error processing", filename, ":", e)

Processed: 25589701.jpg
Processed: 28753061.jpg
Processed: 22215229.jpg
Processed: 22779038.jpg
Processed: 19570587.jpg
Processed: 16630920.jpg
Processed: 21138719.jpg
Processed: 25772368.jpg
Processed: 16396183.jpg
Processed: 23775702.jpg
Processed: 24832988.jpg
Processed: 19788551.jpg
Processed: 19786858.jpg
Processed: 19788552.jpg
Processed: 18924662.jpg
Processed: 27782020.jpg
Processed: 24710703.jpg
Processed: 22860067.jpg
Processed: 23012662.jpg
Processed: 17396786.jpg
Processed: 17185253.jpg
Processed: 21833287.jpg
Processed: 27683428.jpg
Processed: 26685105.jpg
Processed: 29389675.jpg
Processed: 23714402.jpg
Processed: 26758039.jpg
Processed: 28041138.jpg
Processed: 16396193.jpg
Processed: 24321674.jpg
Processed: 17095593.jpg
Processed: 25818051.jpg
Processed: 20971288.jpg
Processed: 23018702.jpg
Processed: 23925202.jpg
Processed: 17840997.jpg
Processed: 22926817.jpg
Processed: 26067618.jpg
Processed: 24187497.jpg
Processed: 24355644.jpg
Processed: 17641927.jpg
Processed: 20702