In [16]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import json

In [17]:
def load_data(path_to_image, path_to_label):
    """this function loads the data(images and labels) into a numpy array and
    returns those numpy array 
    !!!not recomended since this way the files take up to 3Gb of RAM space and
    up to 30 GB after normilization"""
    image_dir = path_to_image
    label_dir = path_to_label
    
    images = []
    labels = []
    
    image_files = [f for f in os.listdir(image_dir) if f.lower().endswith(".jpg")]

    for img_file in sorted(image_files):
        base = os.path.splitext(img_file)[0]
        lbl_file = base + ".txt"
        lbl_path = os.path.join(label_dir, lbl_file)
        img_path = os.path.join(image_dir, img_file)

        # skip if label missing
        if not os.path.exists(lbl_path):
            continue

        # --- Load image ---
        img = cv2.imread(img_path)                     # BGR
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)     # RGB
        img = cv2.resize(img, (224, 224))
        images.append(img)

        # --- Load labels ---
        with open(lbl_path, "r") as f:
            nums = list(map(float, f.read().split()))
            labels.append(nums)

    images = np.array(images, dtype=np.uint8)
    labels = np.array(labels, dtype=np.float32)

    return images, labels


In [18]:
"""this for loading the whole dataset and it can take up to 30 GB in RAM 
it is not recomended"""
#loading data
#X_train, Y_train = load_data("../datasets/images/train", "../datasets/labels/train")
#X_val, Y_val = load_data("../datasets/images/val", "../datasets/labels/val")

#normalizing
#X_train = X_train.astype('float32') #float32 becuse the dataset is big
#X_train /= 255.0
#X_val = X_val.astype('float32')
#X_val /= 255.0

'this for loading the whole dataset and it can take up to 30 GB in RAM \nit is not recomended'

In [30]:

def load_paths_and_labels(image_dir, json_label_dir):
    """
    Loads image paths and labels from JSON files (converted from YOLO TXT).
    Images are NOT loaded into memory.
    """

    image_paths = []
    labels = []

    image_files = sorted(
        [f for f in os.listdir(image_dir) if f.lower().endswith(".jpg")]
    )

    for img_file in image_files:
        base = os.path.splitext(img_file)[0]
        json_path = os.path.join(json_label_dir, base + ".json")
        img_path = os.path.join(image_dir, img_file)

        # skip if json label does not exist
        if not os.path.exists(json_path):
            continue

        with open(json_path, "r") as jf:
            data = json.load(jf)

        # rebuild the YOLO-style numeric vector (length 68)
        nums = []

        # class
        nums.append(float(data["class_id"]))

        # bbox
        bbox = data["bbox"]
        nums.extend([
            bbox["center_x"],
            bbox["center_y"],
            bbox["width"],
            bbox["height"]
        ])

        # keypoints in correct order
        kpt_order = [
            "wrist",
            "thumb_cmc", "thumb_mcp", "thumb_ip", "thumb_tip",
            "index_mcp", "index_pip", "index_dip", "index_tip",
            "middle_mcp", "middle_pip", "middle_dip", "middle_tip",
            "ring_mcp", "ring_pip", "ring_dip", "ring_tip",
            "pinky_mcp", "pinky_pip", "pinky_dip", "pinky_tip"
        ]

        for kpt in kpt_order:
            kp = data["keypoints"][kpt]
            nums.extend([kp["x"], kp["y"], kp["v"]])

        # safety check
        if len(nums) != 68:
            continue

        image_paths.append(img_path)
        labels.append(nums)

    return np.array(image_paths), np.array(labels, dtype=np.float32)


In [31]:
#loading paths and labels
X_train_path, Y_train = load_paths_and_labels("../datasets/images/train", "../datasets/labels/train/json")
X_val_path, Y_val = load_paths_and_labels("../datasets/images/val", "../datasets/labels/val/json")

In [32]:
print(X_train_path.shape)
print(Y_train.shape)
print(X_val_path.shape)
print(Y_val.shape)

(18776,)
(18776, 68)
(7992,)
(7992, 68)


In [12]:
def vgg_model(Drop_rate):

    model = tf.keras.Sequential([
        #Block 1
        tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), strides=1,
                               padding="same", activation="relu", input_shape=(224, 224, 3)),
        tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), strides=1,
                               padding="same", activation="relu"),
        tf.keras.layers.MaxPool2D(pool_size=(2,2), strides=2),

        #Block 2
        tf.keras.layers.Conv2D(filters=128, kernel_size=(3,3), strides=1,
                               padding="same", activation="relu"),
        tf.keras.layers.Conv2D(filters=128, kernel_size=(3,3), strides=1,
                               padding="same", activation="relu"),
        tf.keras.layers.MaxPool2D(pool_size=(2,2), strides=2),

        #Block 3

        tf.keras.layers.Conv2D(filters=256, kernel_size=(3,3), strides=1,
                               padding="same", activation="relu"),
        tf.keras.layers.Conv2D(filters=256, kernel_size=(3,3), strides=1,
                               padding="same", activation="relu"),
        tf.keras.layers.MaxPool2D(pool_size=(2,2), strides=2),

        #Blcok 4

        tf.keras.layers.Conv2D(filters=512, kernel_size=(3,3), strides=1,
                               padding="same", activation="relu"),
        tf.keras.layers.Conv2D(filters=512, kernel_size=(3,3), strides=1,
                               padding="same", activation="relu"),
        tf.keras.layers.MaxPool2D(pool_size=(2,2), strides=2),

        #Block 5

        tf.keras.layers.Conv2D(filters=512, kernel_size=(3,3), strides=1,
                               padding="same", activation="relu"),
        tf.keras.layers.Conv2D(filters=512, kernel_size=(3,3), strides=1,
                               padding="same", activation="relu"),
        tf.keras.layers.MaxPool2D(pool_size=(2,2), strides=2),

        # Deep netweordk

        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(2048, activation='relu'),
        tf.keras.layers.Dropout(Drop_rate),
        tf.keras.layers.Dense(2048, activation='relu'),
        tf.keras.layers.Dropout(Drop_rate),
        tf.keras.layers.Dense(62, activation='linear'),
        
    ])
    
    return model

In [14]:
model = vgg_model(0.7)
model.summary()
