<a href="https://colab.research.google.com/github/ValentinaEmili/Ethnicity-recognition/blob/main/Dlib/68_landmarks/ResNet50.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install validators
!pip install torchvision

## Dataset for the ethnicity recognition task

In [None]:
# upload the dataset
%pip install gdown
import gdown

url = "https://drive.google.com/drive/folders/1ZKMUq6U6pRw7zcP6AkGxoQVX3LnMnGXC?usp=share_link"
gdown.download_folder(url, quiet=True)

## Dataset for the age recognition task

In [None]:
# upload the dataset
%pip install gdown
import gdown

url = "https://drive.google.com/drive/folders/1B77SD7Eh9mbaoGIa7Dcaleqaee-wlMMm?usp=share_link"
gdown.download_folder(url, quiet=True)

## Dataset for the gender recognition task

In [None]:
# upload the dataset
%pip install gdown
import gdown

url = "https://drive.google.com/drive/folders/1hKgsi0EAsyDAKlVV6WEnK8LvslfLc9ME?usp=share_link"
gdown.download_folder(url, quiet=True)

In [None]:
import torch
from torchvision import transforms

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

In [None]:
# load the model pre-trained on ImageNet dataset
resnet50 = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_resnet50', pretrained=True)
# modify the fully connected layer
resnet50.fc = torch.nn.Identity()
resnet50.eval().to(device)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
!pip install tqdm

In [None]:
import zipfile
import os
from PIL import Image
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
import numpy as np
import shutil

In [2]:
def extract_and_save_features(dataloader, folder_path, feature, set):
  folder = os.path.join(folder_path, feature, set)
  os.makedirs(os.path.join(feature, set), exist_ok=True)
  with torch.no_grad():
    for batch, file_names in tqdm(dataloader, desc=f"Extracting {feature} {set} features"):
      batch = batch.to(device)
      features = resnet50(batch)
      for i, file_name in enumerate(file_names):
        np.save(os.path.join(feature, set, file_name.replace(".jpg", ".npy")), features[i].cpu().numpy())
      torch.cuda.empty_cache()  # Free up unused memory after each batch

class ImageDataset(Dataset):
  def __init__(self, subfolder_path, transform=None):
    self.subfolder_path = subfolder_path
    self.transform = transform
    self.image_files = [f for f in os.listdir(subfolder_path)]

  def __getitem__(self, index):
    image_path = os.path.join(self.subfolder_path, self.image_files[index])
    image = Image.open(image_path).convert("RGB")
    image = self.transform(image)
    return image, self.image_files[index]

  def __len__(self):
    return len(self.image_files)

def main():
  #folder_path = "/content/splitted_dataset"
  folder_path = "/content/splitted_by_age"
  #folder_path = "/content/splitted_by_gender"
  for file_name in os.listdir(folder_path):

    # zipped files in the folder biometric_systems_dataset
    if file_name.endswith(".zip"):

      # full path to the zip file
      file_path = os.path.join(folder_path, file_name)

      # create folder in the same path with the name of the zip file
      extract_folder = os.path.join(folder_path, file_name.replace(".zip", ""))
      os.makedirs(extract_folder, exist_ok=True)

      # open and extract the file
      with zipfile.ZipFile(file_path, mode="r") as zip_ref:
        zip_ref.extractall(extract_folder)

  # create folders to save the extracted features
  os.makedirs(os.path.join("eyes", "train"), exist_ok=True)
  os.makedirs(os.path.join("eyes", "test"), exist_ok=True)
  os.makedirs(os.path.join("nose", "train"), exist_ok=True)
  os.makedirs(os.path.join("nose", "test"), exist_ok=True)
  os.makedirs(os.path.join("mouth", "train"), exist_ok=True)
  os.makedirs(os.path.join("mouth", "test"), exist_ok=True)

  # load images -> create DataLoaders -> extract and save features

  eyes_train_dataset = ImageDataset(subfolder_path=os.path.join(folder_path, "eyes", "train"), transform=transform)
  eyes_train_dataloader = DataLoader(eyes_train_dataset, batch_size=64, shuffle=True)
  extract_and_save_features(eyes_train_dataloader, folder_path, "eyes", "train")

  eyes_test_dataset = ImageDataset(subfolder_path=os.path.join(folder_path, "eyes", "test"), transform=transform)
  eyes_test_dataloader = DataLoader(eyes_test_dataset, batch_size=64, shuffle=False)
  extract_and_save_features(eyes_test_dataloader, folder_path, "eyes", "test")

  nose_train_dataset = ImageDataset(subfolder_path=os.path.join(folder_path, "nose", "train"), transform=transform)
  nose_train_dataloader = DataLoader(nose_train_dataset, batch_size=64, shuffle=True)
  extract_and_save_features(nose_train_dataloader, folder_path, "nose", "train")

  nose_test_dataset = ImageDataset(subfolder_path=os.path.join(folder_path, "nose", "test"), transform=transform)
  nose_test_dataloader = DataLoader(nose_test_dataset, batch_size=64, shuffle=False)
  extract_and_save_features(nose_test_dataloader, folder_path, "nose", "test")

  mouth_train_dataset = ImageDataset(subfolder_path=os.path.join(folder_path, "mouth", "train"), transform=transform)
  mouth_train_dataloader = DataLoader(mouth_train_dataset, batch_size=64, shuffle=True)
  extract_and_save_features(mouth_train_dataloader, folder_path, "mouth", "train")

  mouth_test_dataset = ImageDataset(subfolder_path=os.path.join(folder_path, "mouth", "test"), transform=transform)
  mouth_test_dataloader = DataLoader(mouth_test_dataset, batch_size=64, shuffle=False)
  extract_and_save_features(mouth_test_dataloader, folder_path, "mouth", "test")

  shutil.make_archive("eyes", "zip", "eyes")
  shutil.make_archive("nose", "zip", "nose")
  shutil.make_archive("mouth", "zip", "mouth")

if __name__ == "__main__":
  main()

NameError: name 'Dataset' is not defined