In [1]:
!pip install kaggle



In [2]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle (5).json


{'kaggle (5).json': b'{"username":"aayushjoshi12","key":"0adcef7b5e722a1bed84a8526d075b8c"}'}

In [3]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

In [4]:
!chmod 600 ~/.kaggle/kaggle.json

In [5]:
!kaggle datasets download -d sbaghbidi/human-faces-object-detection

human-faces-object-detection.zip: Skipping, found more recently modified local copy (use --force to force download)


In [6]:
!unzip human-faces-object-detection.zip > datasets/face_detection_dataset

Archive:  human-faces-object-detection.zip
replace faces.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: faces.csv               
  inflating: images/00000003.jpg     
  inflating: images/00000004.jpg     
  inflating: images/00000005.jpg     
  inflating: images/00000006.jpg     
  inflating: images/00000008.jpg     
  inflating: images/00000011.jpg     
  inflating: images/00000012.jpg     
  inflating: images/00000015.jpg     
  inflating: images/00000018.jpg     
  inflating: images/00000020.jpg     
  inflating: images/00000022.jpg     
  inflating: images/00000023.jpg     
  inflating: images/00000024.jpg     
  inflating: images/00000026.jpg     
  inflating: images/00000031.jpg     
  inflating: images/00000032.jpg     
  inflating: images/00000041.jpg     
  inflating: images/00000044.jpg     
  inflating: images/00000045.jpg     
  inflating: images/00000047.jpg     
  inflating: images/00000050.jpg     
  inflating: images/00000051.jpg     
  inflating: images/0000

In [7]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import pandas as pd
import cv2 as cv
import os

In [8]:
device = torch.device("cuda" if torch.cuda.is_available else "cpu")

In [9]:
class FaceDetectionDataset(Dataset):
    def __init__(self, dataset_path):
        self.transforms = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((255, 255)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        self.__annotations__ = pd.read_csv(f"{dataset_path}/faces.csv")
        self.__images_path__ = f"{dataset_path}/images"

    def __len__(self):
        return len(self.__annotations__)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        image_path = os.path.join(self.__images_path__, self.__annotations__.iloc[idx, 0])
        image = cv.imread(image_path)
        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        image = self.transforms(image)
        faces = self.__annotations__.iloc[idx, 3:].values.astype(float)
        faces = torch.tensor(faces, dtype=torch.float32)

        return image, faces

In [10]:
dataset = FaceDetectionDataset("datasets/face_detection_dataset")

In [11]:
loader = DataLoader(dataset, batch_size=8, shuffle=True)

In [12]:
model = models.resnet50(pretrained=True)



In [13]:
model.fc = torch.nn.Linear(model.fc.in_features, 4) # outputs x0,y0,x1,y1

In [14]:
model = model.to(device)

In [15]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
epochs = 10

In [16]:
for epoch in range(epochs):
  for i, (image, faces) in enumerate(loader):
    image = image.to(device)
    faces = faces.to(device)
    optimizer.zero_grad()
    output = model(image)
    loss = criterion(output, faces)
    loss.backward()
    optimizer.step()

    if i % 10 == 0 :
      print(f"Epoch: {epoch+1}/{epochs}, iter: {i}/{len(loader)}, loss: {loss.item()}")

Epoch: 1/10, iter: 0/419, loss: 324049.9375
Epoch: 1/10, iter: 10/419, loss: 177244.59375
Epoch: 1/10, iter: 20/419, loss: 342903.46875
Epoch: 1/10, iter: 30/419, loss: 153947.71875
Epoch: 1/10, iter: 40/419, loss: 169736.421875
Epoch: 1/10, iter: 50/419, loss: 127815.7109375
Epoch: 1/10, iter: 60/419, loss: 322864.0625
Epoch: 1/10, iter: 70/419, loss: 163473.0625
Epoch: 1/10, iter: 80/419, loss: 196673.234375
Epoch: 1/10, iter: 90/419, loss: 283109.625
Epoch: 1/10, iter: 100/419, loss: 132503.046875
Epoch: 1/10, iter: 110/419, loss: 80299.171875
Epoch: 1/10, iter: 120/419, loss: 151498.140625
Epoch: 1/10, iter: 130/419, loss: 144816.109375
Epoch: 1/10, iter: 140/419, loss: 1129033.875
Epoch: 1/10, iter: 150/419, loss: 339239.03125
Epoch: 1/10, iter: 160/419, loss: 250827.3125
Epoch: 1/10, iter: 170/419, loss: 382289.625
Epoch: 1/10, iter: 180/419, loss: 90630.265625
Epoch: 1/10, iter: 190/419, loss: 124427.0625
Epoch: 1/10, iter: 200/419, loss: 59555.62890625
Epoch: 1/10, iter: 210/41

In [17]:
torch.save(model.state_dict(), 'face_detection_model.pth')