In [3]:
import os
import pandas as pd
from PIL import Image
import numpy as np
from tqdm import tqdm
import cv2

In [4]:
train_df = pd.read_csv('dataset/train.csv')
bbox_df = pd.read_csv('dataset/bbox.csv')

In [7]:
def preprocess_images(image_folder, output_folder, df, size=(224, 224)):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for i, row in tqdm(df.iterrows(), total=len(df)):
        image_path = os.path.join(image_folder, row['Name'])
        image = Image.open(image_path)
        image = image.resize(size)
        image = np.array(image) / 255.0  # Normalize pixel values
        image = (image * 255).astype(np.uint8)  # Convert back to 8-bit values

        output_path = os.path.join(output_folder, row['Name'])
        cv2.imwrite(output_path, cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

# Preprocess training images
preprocess_images('dataset/train', 'dataset/preprocessed/train', train_df)

# Preprocess testing images (assuming you have a test.csv similar to train.csv)
test_df = pd.read_csv('dataset/test.csv')
preprocess_images('dataset/test', 'dataset/preprocessed/test', test_df)



100%|████████████████████████████████████████████████████████████████████████████| 10233/10233 [03:17<00:00, 51.88it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 3963/3963 [01:16<00:00, 51.74it/s]


In [8]:
def process_bounding_boxes(df, image_folder):
    processed_boxes = {}
    for _, row in tqdm(df.iterrows(), total=len(df)):
        image_name = row['Name']
        if image_name not in processed_boxes:
            processed_boxes[image_name] = []
        processed_boxes[image_name].append([row['xmin'], row['ymin'], row['xmax'], row['ymax']])

    # Save or use the processed_boxes dictionary as needed
    return processed_boxes

bbox_dict = process_bounding_boxes(bbox_df, 'dataset/train')


100%|██████████████████████████████████████████████████████████████████████████| 62529/62529 [00:06<00:00, 9047.38it/s]
