In [1]:
import cv2
import os
import numpy as np

# Set project_folder to the current working directory
project_folder = os.getcwd()

image_size = 300  # resize images 

def extract_bounding_box(annotation_file):
    tree = ET.parse(annotation_file)
    root = tree.getroot()
    bndbox = root.find('.//bndbox')
    xmin = int(bndbox.find('xmin').text)
    ymin = int(bndbox.find('ymin').text)
    xmax = int(bndbox.find('xmax').text)
    ymax = int(bndbox.find('ymax').text)
    return xmin, ymin, xmax, ymax

def load_and_preprocess_image(image_path, annotation_path):
    xmin, ymin, xmax, ymax = extract_bounding_box(annotation_path)
    image = cv2.imread(image_path)
    cropped_image = image[ymin:ymax, xmin:xmax]
    resized_image = cv2.resize(cropped_image, (image_size, image_size))
    return resized_image.flatten()  # Flatten the image to a 1D arrayb

In [None]:
def prepare_dataset(images_folder, annotations_folder):
    X = []  # Feature vectors
    y = []  # Labels
    
    breed_labels = [d for d in os.listdir(images_folder) if os.path.isdir(os.path.join(images_folder, d))]
    
    for breed in breed_labels:
        breed_images_folder = os.path.join(images_folder, breed)
        breed_annotations_folder = os.path.join(annotations_folder, breed)
        
        for image_file in os.listdir(breed_images_folder):
            if image_file.startswith('.'):  # Skip hidden files like .DS_Store
                continue
            
            image_path = os.path.join(breed_images_folder, image_file)
            base_filename = os.path.splitext(image_file)[0]
            annotation_file_path = os.path.join(breed_annotations_folder, base_filename)  # No extension for annotation file
            
            if os.path.isfile(annotation_file_path):
                processed_image = load_and_preprocess_image(image_path, annotation_file_path)
                if processed_image is not None:
                    X.append(processed_image)
                    y.append(breed)
            else:
                print(f"Annotation file does not exist for {image_file}")
    
    return np.array(X), np.array(y)


# Construct the paths to the 'images' and 'annotations' folders
images_folder = os.path.join(project_folder, 'archive', 'images', 'Images')
annotations_folder = os.path.join(project_folder, 'archive', 'annotations', 'Annotation')