In [1]:
import os
import pandas as pd
import numpy as np
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pickle

In [2]:
# Load the CSV file
train_data = pd.read_csv(r"C:\Users\acer\OneDrive\Jupyter\object_detection_project\data\train\classes.csv")
test_data = pd.read_csv(r"C:\Users\acer\OneDrive\Jupyter\object_detection_project\data\test\classes.csv")
valid_data = pd.read_csv(r"C:\Users\acer\OneDrive\Jupyter\object_detection_project\data\valid\classes.csv")

In [3]:
# Helper function to load and preprocess images
def preprocess_image(filename, target_size=(224, 224)):
    if not os.path.exists(filename):
        print(f"Error: File not found -> {filename}")
        return None  # Skip this file
    
    img = cv2.imread(filename)  # Read the image
    if img is None:
        print(f"Error: Failed to load image -> {filename}")
        return None  # Skip this file

    img = cv2.resize(img, target_size)  # Resize to the desired target size
    img = img.astype('float32') / 255.0  # Normalize the image
    return img

In [4]:
def load_data(dataframe, image_folder):
    images = []
    labels = []
    for i, row in dataframe.iterrows():
        img_path = os.path.join(image_folder, row['filename'])
        img = preprocess_image(img_path)
        if img is not None:  # Only append if image was loaded successfully
            images.append(img)
            labels.append(row.iloc[1:].values)  # Skip filename column
    return images, labels

In [5]:
# Load and preprocess all datasets
train_images, train_labels = load_data(train_data, 'data/train/images/')
valid_images, valid_labels = load_data(valid_data, 'data/valid/images/')
test_images, test_labels = load_data(test_data, 'data/test/images/')

In [6]:
train_images[0]

array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       ...,

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]], dtype=float32)

In [7]:
# Convert lists to NumPy arrays
train_images = np.array(train_images, dtype=np.float32)
train_labels = np.array(train_labels, dtype=np.float32)
valid_images = np.array(valid_images, dtype=np.float32)
valid_labels = np.array(valid_labels, dtype=np.float32)
test_images = np.array(test_images, dtype=np.float32)
test_labels = np.array(test_labels, dtype=np.float32)

In [8]:
# Check shapes
print('Train images shape:', train_images.shape)
print('Train labels shape:', train_labels.shape)
print('Valid images shape:', valid_images.shape)
print('Valid labels shape:', valid_labels.shape)
print('Test images shape:', test_images.shape)
print('Test labels shape:', test_labels.shape)

Train images shape: (135, 224, 224, 3)
Train labels shape: (135, 78)
Valid images shape: (55, 224, 224, 3)
Valid labels shape: (55, 78)
Test images shape: (19, 224, 224, 3)
Test labels shape: (19, 78)


In [9]:
# Check the data type
print("Train images dtype:", train_images.dtype)
print("Train labels dtype:", train_labels.dtype)
print("Valid images dtype:", valid_images.dtype)
print("Valid labels dtype:", valid_labels.dtype)
print("Test images dtype:", test_images.dtype)
print("Test labels dtype:", test_labels.dtype)

Train images dtype: float32
Train labels dtype: float32
Valid images dtype: float32
Valid labels dtype: float32
Test images dtype: float32
Test labels dtype: float32


In [10]:
# Define the directory path
preprocessed_dir = r"C:\Users\acer\OneDrive\Jupyter\object_detection_project\preprocessed"

# Create the directory if it doesn't exist
os.makedirs(preprocessed_dir, exist_ok=True)

# Save as pickle files in the preprocessed folder
with open(os.path.join(preprocessed_dir, 'train_images.pkl'), 'wb') as f:
    pickle.dump(train_images, f)
with open(os.path.join(preprocessed_dir, 'train_labels.pkl'), 'wb') as f:
    pickle.dump(train_labels, f)
with open(os.path.join(preprocessed_dir, 'valid_images.pkl'), 'wb') as f:
    pickle.dump(valid_images, f)
with open(os.path.join(preprocessed_dir, 'valid_labels.pkl'), 'wb') as f:
    pickle.dump(valid_labels, f)
with open(os.path.join(preprocessed_dir, 'test_images.pkl'), 'wb') as f:
    pickle.dump(test_images, f)
with open(os.path.join(preprocessed_dir, 'test_labels.pkl'), 'wb') as f:
    pickle.dump(test_labels, f)

print(f"All files saved successfully in {preprocessed_dir}")

All files saved successfully in C:\Users\acer\OneDrive\Jupyter\object_detection_project\preprocessed
