In [None]:
# The HOG is a global descriptor (feature extraction) method applied to each pixel within an image to extract neighborhood information(neighborhood of pixel)
# like texture and compress/abstract that information from a given image into a reduced/condensed vector form called a feature vector that could describe the
# feature of this image which is very useful when it came to captures edge and gradient structures in an image.

In [None]:
%pip install gdown
import gdown

# URL for the folder
url = "https://drive.google.com/drive/folders/1ZKMUq6U6pRw7zcP6AkGxoQVX3LnMnGXC?usp=share_link"
gdown.download_folder(url, quiet=True)

In [None]:
!pip install tqdm

In [None]:
import os
import zipfile
from tqdm import tqdm
import numpy as np
import dlib
import cv2
from google.colab.patches import cv2_imshow
import shutil

def extract_hog_features(image, image_path):
  gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
  hog = cv2.HOGDescriptor(
        _winSize=(gray.shape[1] // 8 * 8, gray.shape[0] // 8 * 8),
        _blockSize=(16, 16),
        _blockStride=(8, 8),
        _cellSize=(8, 8),
        _nbins=9  # Number of bins in histogram
    )
  features = hog.compute(gray)
  if type(features) == tuple:
    return _, False

  return features.flatten(), True

def main():
  folder_path = "/content/splitted_dataset"
  for file_name in os.listdir(folder_path):

    # zipped files in the folder biometric_systems_dataset
    if file_name.endswith(".zip"):

      # full path to the zip file
      file_path = os.path.join(folder_path, file_name)

      # create folder in the same path with the name of the zip file
      extract_folder = os.path.join(folder_path, file_name.replace(".zip", ""))
      os.makedirs(extract_folder, exist_ok=True)

      # open and extract the file
      with zipfile.ZipFile(file_path, mode="r") as zip_ref:
        zip_ref.extractall(extract_folder)

  feature_folders = ["eyes", "nose", "mouth"]
  sets = ["train", "test"]
  bad_images = []
  for feature in feature_folders:
    feature_folder_path = os.path.join(folder_path, feature)
    os.makedirs(feature, exist_ok=True)

    #remove the extracted zip files
    if feature_folder_path.endswith(".zip"):
      os.remove(feature_folder_path)
    for s in sets:
      # output folder for the embeddings for each feature
      os.makedirs(os.path.join(feature, s), exist_ok=True)
      set_folder_path = os.path.join(feature_folder_path, s)
      for file_name in tqdm(os.listdir(set_folder_path), desc="Processing Images"):
        if file_name.endswith(".jpg"):
          image_path = os.path.join(set_folder_path, file_name)
          image = cv2.imread(image_path)
          hog_features, boolean = extract_hog_features(image, image_path)
          if boolean:
            np.save(os.path.join(feature, s, file_name.replace(".jpg", ".npy")), hog_features)
          else:
            bad_images.append(image_path)

  for feature in feature_folders:
    shutil.make_archive(feature, "zip", feature)

  for image in bad_images:
    os.remove(image)

if __name__ == "__main__":
  main()