In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join('..')))

from src.data_utils.dataset import Dataset
import h5py
import numpy as np
import cv2

In [4]:
def extract_descriptors(datatype):
    ds = Dataset(datatype)
    images, suits, numbers = ds.load_data()

    sift = cv2.SIFT().create()
    descriptors = []

    # Create a new array for storing labels for each descriptor
    desc_suits = []
    desc_numbers = []

    for img, suit, num in zip(images, suits, numbers):
        kp, desc = sift.detectAndCompute(img, None)

        descriptors.append(desc)
        desc_suits.append(np.array([[suit] * len(desc)]).reshape(-1, 1))
        desc_numbers.append(np.array([[num] * len(desc)]).reshape(-1, 1))

    print(f"Extracted descriptors for {datatype}")

    return descriptors, desc_suits, desc_numbers


def store_descriptors(datatype):
    desc, suits, nums = extract_descriptors(datatype)

    with h5py.File(f"../data/{datatype}_descriptors_and_labels.h5", "w") as f:
        desc_group = f.create_group("descriptors")
        for idx, arr in enumerate(desc):
            desc_group.create_dataset(str(idx), data=arr)

        suits_group = f.create_group("suits")
        for idx, arr in enumerate(suits):
            suits_group.create_dataset(str(idx), data=arr.astype("S8"))

        nums_group = f.create_group("numbers")
        for idx, arr in enumerate(nums):
            nums_group.create_dataset(str(idx), data=arr.astype("S6"))

    print(f"Stored descriptors for {datatype}\n")

    

In [5]:
for dt in ["train", "test", "val"]:
    store_descriptors(dt)

Loaded 7509 images and labels.
Extracted descriptors for train
Stored descriptors for train

Loaded 260 images and labels.
Extracted descriptors for test
Stored descriptors for test

Loaded 260 images and labels.
Extracted descriptors for val
Stored descriptors for val



In [2]:
val_ds = Dataset("train")
desc, suits, nums = val_ds.load_descriptors(n=5)

In [30]:
with h5py.File("../data/val_descriptors_and_labels.h5", "r") as valf:
    print(valf.keys())

    desc_group = valf["descriptors"]
    descriptors = [desc_group[img][:] for img in desc_group]

    suits_group = valf["suits"]
    suits = [suits_group[img][:] for img in suits_group]

    nums_group = valf["numbers"]
    numbers = [nums_group[img][:] for img in nums_group]

<KeysViewHDF5 ['descriptors', 'numbers', 'suits']>
