In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join('..')))

from src.data_utils.dataset import Dataset
import h5py
import numpy as np
import cv2

In [52]:
def extract_descriptors(datatype):
    ds = Dataset(datatype)
    images, suits, numbers = ds.load_data()

    sift = cv2.SIFT().create()
    descriptors = []

    # Create a new array for storing labels for each descriptor
    desc_suits = []
    desc_numbers = []

    for img, suit, num in zip(images, suits, numbers):
        kp, desc = sift.detectAndCompute(img, None)

        descriptors.append(desc)
        desc_suits.append(np.array([[suit] * len(desc)]).reshape(-1, 1))
        desc_numbers.append(np.array([[num] * len(desc)]).reshape(-1, 1))

    descriptors = np.vstack(descriptors)
    suits = np.vstack(desc_suits)
    numbers = np.vstack(desc_numbers)

    print(f"Extracted descriptors for {datatype}")

    return descriptors, suits, numbers


def store_descriptors(datatype):
    desc, suits, nums = extract_descriptors(datatype)

    with h5py.File(f"../data/{datatype}_images.h5", "w") as f:
        f.create_dataset(f"{datatype}_descriptors", data=desc)
    
    with h5py.File(f"../data/{datatype}_suits.h5", "w") as f:
        f.create_dataset(f"{datatype}_suits", data=suits.astype("S8"))

    with h5py.File(f"../data/{datatype}_numbers.h5", "w") as f:
        f.create_dataset(f"{datatype}_numbers", data=nums.astype("S6"))

    print(f"Stored descriptors for {datatype}")

    

In [53]:
for ds in ["train", "test", "val"]:
    store_descriptors(ds)

Loaded 7509 images and labels.
Extracted descriptors for train
Stored descriptors for train
Loaded 260 images and labels.
Extracted descriptors for test
Stored descriptors for test
Loaded 260 images and labels.
Extracted descriptors for val
Stored descriptors for val
