In [None]:
import os
import cv2
import numpy as np
from glob import glob
from sklearn.model_selection import train_test_split

# Directories
raw_data_dir = 'data/raw/'
processed_data_dir = 'data/processed/'

# Parameters
img_size = 256

def preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (img_size, img_size))
    img = img / 255.0
    return img

def save_image(image, save_path):
    img = (image * 255).astype(np.uint8)
    cv2.imwrite(save_path, img)

# Preprocess images
image_paths = glob(os.path.join(raw_data_dir, '*.jpg'))
images = np.array([preprocess_image(p) for p in image_paths])

# Save processed images
for i, img in enumerate(images):
    save_image(img, os.path.join(processed_data_dir, f'image_{i}.jpg'))

# Split data into train, validation, test
train_images, test_images = train_test_split(images, test_size=0.2, random_state=42)
train_images, val_images = train_test_split(train_images, test_size=0.1, random_state=42)

np.save(os.path.join(processed_data_dir, 'train_images.npy'), train_images)
np.save(os.path.join(processed_data_dir, 'val_images.npy'), val_images)
np.save(os.path.join(processed_data_dir, 'test_images.npy'), test_images)