In [1]:
import os
import numpy as np
import cv2
import glob
import h5py
from typing import Literal, Tuple

In [2]:
class Dataset:
    def __init__(self, data_type: Literal["train", "test", "val"]) -> None:
        self.data_type = data_type
        match data_type:
            case "train":
                self.data_path = "../../data/playing_cards/train/"
            case "test":
                self.data_path = "../../data/playing_cards/test/"
            case "val":
                self.data_path = "../../data/playing_cards/val/"
    
    
    def load_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
        """
        Load and return the images and their labels as numpy array. 

        Returns: 
            - images: Array of images
            - suits: Suit of the image
            - numbers: Card number of the image
        """
        images = []
        labels = os.listdir(self.data_path)

        # Separate columns for number and suit
        suits = []
        numbers = []

        for label in labels:
            if "joker" in label:
                continue

            for img_path in glob.glob(
                os.path.join(self.data_path, label, "*.jpg")
            ):
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                images.append(img)

                number, _, suit = label.split()
                
                suits.append(suit)
                numbers.append(number)

        self.images = np.array(images)
        self.suits = np.array(suits)
        self.numbers = np.array(numbers)

        print(f"Loaded {len(self.images)} images and labels.")

        return self.images, self.suits, self.numbers
    

    def load_descriptors(self):
        """
        Load and return the image descriptors and their labels as numpy array. 

        Returns: 
            - descriptors: Array of descriptors
            - suits: Suit of the image
            - numbers: Card number of the image
        """

        with h5py.File(f"../data/{self.data_type}_descriptors.h5", "r") as f:
            descriptors = f[f"{self.data_type}_descriptors"][:]

        with h5py.File(f"../data/{self.data_type}_suits.h5", "r") as f:
            suits = f[f"{self.data_type}_suits"][:]

        with h5py.File(f"../data/{self.data_type}_numbers.h5", "r") as f:
            numbers = f[f"{self.data_type}_numbers"][:]    


        return descriptors, suits, numbers

In [41]:
train = Dataset("test")

i, s, n = train.load_data()

Loaded 260 images and labels.


In [42]:
i.shape, s.shape, n.shape

((260, 224, 224), (260,), (260,))