In [44]:
import os
import pickle
import numpy as np
from tqdm import tqdm
from deepface import DeepFace

def get_embedding_vec(img_path):
    backends = [
    'opencv', 
    'ssd', 
    'dlib', 
    'mtcnn', 
    'retinaface', 
    'mediapipe',
    'yolov8',
    'yunet',
    'fastmtcnn',
    ]

    embedding_objs = DeepFace.represent(
        img_path=img_path, 
        detector_backend = backends[2],
        enforce_detection=False
    )

    if embedding_objs:
        return np.array(embedding_objs[0]['embedding'])
    else:
        return None



def get_embeddings(src):
    emb_vecs = []
    labels = []

    class_names = set()
    class_to_idx = {}

    print('getting feature embeddings')

    files = [file for file in sorted(os.listdir(src)) if file.endswith('.jpeg')]

    i = 0
    file: str
    for file in tqdm(files):
        class_name = file.split('_')[0]

        emb_vec = get_embedding_vec(f'{src}/{file}')

        if type(emb_vec) != type(None):
            # embedding vector with corresponding label
            emb_vecs.append(emb_vec)
            labels.append(i)

            # update class_names set
            i += 1 if class_name not in class_names else 0
            class_names.add(class_name)

            # update dictionary
            class_to_idx[class_name] = i

    X = np.array(emb_vecs)
    Y = np.array(labels)

    print('X.shape =', X.shape)
    print('Y.shape =', Y.shape)

    class_names = np.array(class_names)

    return X, Y, class_names, class_to_idx

In [41]:
X, Y, class_names, class_to_idx = get_embeddings('augmented_data/Raw')

getting feature embeddings


100%|██████████| 9916/9916 [48:27<00:00,  3.41it/s]  


X.shape = (9843, 4096)
Y.shape = (9843,)


In [42]:
dst = 'embedded_feature_data'

os.makedirs(dst, exist_ok=True)

desc = 'train'

np.save(f'{dst}/X_{desc}_emb.npy', X)
np.save(f'{dst}/Y_{desc}_emb.npy', Y)
np.save(f'{dst}/class_names.npy', class_names)

with open(os.path.join(dst, 'class_to_idx.pkl'), 'wb') as handle:
    pickle.dump(class_to_idx, handle)

In [45]:
X, Y, class_names, class_to_idx = get_embeddings('sorted_val_data/Raw')

getting feature embeddings


100%|██████████| 154/154 [00:49<00:00,  3.12it/s]

X.shape = (153, 4096)
Y.shape = (153,)





In [None]:
dst = 'embedded_feature_data'

os.makedirs(dst, exist_ok=True)

desc = 'val'

np.save(f'{dst}/X_{desc}_emb.npy', X)
np.save(f'{dst}/Y_{desc}_emb.npy', Y)
np.save(f'{dst}/class_names_val.npy', class_names)

with open(os.path.join(dst, 'class_to_idx_val.pkl'), 'wb') as handle:
    pickle.dump(class_to_idx, handle)