In [1]:
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
import numpy as np
import pandas as pd
import os

workers = 0 if os.name == 'nt' else 4
dtype = torch.cuda.FloatTensor

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cuda:0


In [3]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

In [7]:
resnet = InceptionResnetV1(pretrained='casia-webface').eval().to(device)

Downloading parameters (1/2)
Downloading parameters (2/2)


In [10]:
def collate_fn(x):
    return x[0]

dataset = datasets.ImageFolder('../../../data/small_data/train')
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=workers)

In [11]:
aligned = []
names = []
for x, y in loader:
    x_aligned, prob = mtcnn(x, return_prob=True)
    if x_aligned is not None:
        print('Face detected with probability: {:8f}'.format(prob))
        aligned.append(x_aligned)
        names.append(dataset.idx_to_class[y])

Face detected with probability: 0.999904
Face detected with probability: 0.999998
Face detected with probability: 0.999993
Face detected with probability: 0.999950
Face detected with probability: 0.999990
Face detected with probability: 0.999200
Face detected with probability: 0.999220
Face detected with probability: 0.999859
Face detected with probability: 0.999829
Face detected with probability: 0.990638
Face detected with probability: 0.999998
Face detected with probability: 0.999989
Face detected with probability: 0.999358
Face detected with probability: 0.999988
Face detected with probability: 0.999991
Face detected with probability: 0.999769
Face detected with probability: 0.999997
Face detected with probability: 0.999973
Face detected with probability: 0.999812
Face detected with probability: 1.000000
Face detected with probability: 0.999832
Face detected with probability: 0.984941
Face detected with probability: 0.999976
Face detected with probability: 0.999994
Face detected wi

Face detected with probability: 0.999991
Face detected with probability: 0.998919
Face detected with probability: 0.999963
Face detected with probability: 0.999490
Face detected with probability: 0.999994
Face detected with probability: 0.999997
Face detected with probability: 0.999993
Face detected with probability: 0.999989
Face detected with probability: 0.999890
Face detected with probability: 0.999940
Face detected with probability: 0.999920
Face detected with probability: 0.999999
Face detected with probability: 0.999999
Face detected with probability: 0.999992
Face detected with probability: 0.999829
Face detected with probability: 0.999993
Face detected with probability: 0.999985
Face detected with probability: 0.998102
Face detected with probability: 0.999985
Face detected with probability: 0.999988
Face detected with probability: 0.999987
Face detected with probability: 0.999974
Face detected with probability: 0.999989
Face detected with probability: 0.999784
Face detected wi

In [13]:
aligned = torch.stack(aligned).to(device)
embeddings = resnet(aligned).detach().cpu()
embeddings.shape

TypeError: stack(): argument 'tensors' (position 1) must be tuple of Tensors, not Tensor