# Face detection and recognition training pipeline

The following example illustrates how to fine-tune an InceptionResnetV1 model on your own dataset. This will mostly follow standard pytorch training patterns.

In [19]:
import torch

In [22]:
torch.cuda.device_count()

4

In [3]:
torch.__version__

'1.2.0'

In [4]:
!pip install tensorboard

Collecting tensorboard
[?25l  Downloading https://files.pythonhosted.org/packages/9b/a6/e8ffa4e2ddb216449d34cfcb825ebb38206bee5c4553d69e7bc8bc2c5d64/tensorboard-2.0.0-py3-none-any.whl (3.8MB)
[K     |################################| 3.8MB 2.9MB/s eta 0:00:01
[?25hCollecting absl-py>=0.4 (from tensorboard)
[?25l  Downloading https://files.pythonhosted.org/packages/3b/72/e6e483e2db953c11efa44ee21c5fdb6505c4dffa447b4263ca8af6676b62/absl-py-0.8.1.tar.gz (103kB)
[K     |################################| 112kB 233kB/s eta 0:00:01
Collecting werkzeug>=0.11.15 (from tensorboard)
[?25l  Downloading https://files.pythonhosted.org/packages/ce/42/3aeda98f96e85fd26180534d36570e4d18108d62ae36f87694b476b83d6f/Werkzeug-0.16.0-py2.py3-none-any.whl (327kB)
[K     |################################| 327kB 1.3MB/s eta 0:00:01
Collecting markdown>=2.6.8 (from tensorboard)
[?25l  Downloading https://files.pythonhosted.org/packages/c0/4e/fd492e91abdc2d2fcb70ef453064d980688762079397f779758e055f6575/Ma

In [7]:
!pip install flask

Collecting flask
[?25l  Downloading https://files.pythonhosted.org/packages/9b/93/628509b8d5dc749656a9641f4caf13540e2cdec85276964ff8f43bbb1d3b/Flask-1.1.1-py2.py3-none-any.whl (94kB)
[K     |################################| 102kB 1.2MB/s ta 0:00:011
Collecting itsdangerous>=0.24 (from flask)
  Downloading https://files.pythonhosted.org/packages/76/ae/44b03b253d6fade317f32c24d100b3b35c2239807046a4c953c7b89fa49e/itsdangerous-1.1.0-py2.py3-none-any.whl
Collecting click>=5.1 (from flask)
[?25l  Downloading https://files.pythonhosted.org/packages/fa/37/45185cb5abbc30d7257104c434fe0b07e5a195a6847506c074527aa599ec/Click-7.0-py2.py3-none-any.whl (81kB)
[K     |################################| 81kB 327kB/s eta 0:00:011
Installing collected packages: itsdangerous, click, flask
Successfully installed click-7.0 flask-1.1.1 itsdangerous-1.1.0


In [9]:
!pip install future

Collecting future
[?25l  Downloading https://files.pythonhosted.org/packages/f6/85/c273089eb6efa5644c0a1382ea553554bc0d40e00a46d989ec67f123f8b5/future-0.18.0.tar.gz (830kB)
[K     |################################| 839kB 872kB/s eta 0:00:01
[?25hBuilding wheels for collected packages: future
  Building wheel for future (setup.py) ... [?25ldone
[?25h  Created wheel for future: filename=future-0.18.0-cp36-none-any.whl size=490415 sha256=b594cdd8143147aea261e5cba7dc00a351ac36188a93f09655c6c9ef83a9201c
  Stored in directory: /root/.cache/pip/wheels/2c/02/af/63eadc269fe686aa0aa9c38eee165ad5734cbf8b765cfeedaa
Successfully built future
Installing collected packages: future
Successfully installed future-0.18.0


In [10]:
from facenet_pytorch import MTCNN, InceptionResnetV1, prewhiten, training
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import numpy as np
import os

#### Define run parameters

The dataset should follow the VGGFace2/ImageNet-style directory layout

In [11]:
data_dir = '../data/test_images'

batch_size = 32
epochs = 8
workers = 0 if os.name == 'nt' else 4

#### Determine if an nvidia GPU is available

In [12]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cuda:0


#### Define MTCNN module

See `help(MTCNN)` for more details.

In [13]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, prewhiten=True,
    device=device
)

#### Perfom MTCNN facial detection

Iterate through the DataLoader object and obtained cropped faces.

In [14]:
dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((1024, 1024)))
dataset.samples = [(p, p.replace(data_dir, data_dir + '_cropped')) for p, _ in dataset.samples]
        
loader = DataLoader(dataset, num_workers=workers, batch_size=16, collate_fn=training.collate_pil)

for i, (x, y) in enumerate(loader):
    print('\rImages processed: {:8d} of {:8d}'.format(i + 1, len(loader)), end='')
    mtcnn(x, save_path=y)
    
# Remove mtcnn to reduce GPU memory usage
del mtcnn

Images processed:        1 of        1

#### Define Inception Resnet V1 module

See `help(InceptionResnetV1)` for more details.

In [15]:
resnet = InceptionResnetV1(
    classify=True,
    pretrained='vggface2',
    num_classes=len(dataset.class_to_idx)
).to(device)

#### Define optimizer, scheduler, dataset, and dataloader

In [16]:
optimizer = optim.Adam(resnet.parameters(), lr=0.001)
scheduler = MultiStepLR(optimizer, [5, 10])

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    prewhiten
])
dataset = datasets.ImageFolder(data_dir + '_cropped', transform=trans)
img_inds = np.arange(len(dataset))
np.random.shuffle(img_inds)
train_inds = img_inds[:int(0.8 * len(img_inds))]
val_inds = img_inds[int(0.8 * len(img_inds)):]

train_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds)
)
val_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(val_inds)
)

#### Define loss and evaluation functions

In [17]:
loss_fn = torch.nn.CrossEntropyLoss()
metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

#### Train model

In [18]:
writer = SummaryWriter()
writer.iteration, writer.interval = 0, 10

print('\n\nInitial')
print('-' * 10)
resnet.eval()
training.pass_epoch(
    resnet, loss_fn, val_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
)

for epoch in range(epochs):
    print('\nEpoch {}/{}'.format(epoch + 1, epochs))
    print('-' * 10)

    resnet.train()
    training.pass_epoch(
        resnet, loss_fn, train_loader, optimizer, scheduler,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

    resnet.eval()
    training.pass_epoch(
        resnet, loss_fn, val_loader,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

writer.close()



Initial
----------
Valid |     1/1    | loss:    1.5545 | fps:    0.8127 | acc:    1.0000   

Epoch 1/8
----------
Train |     1/1    | loss:    1.5954 | fps:   10.1390 | acc:    0.5000   
Valid |     1/1    | loss:    1.5584 | fps:    3.6638 | acc:    0.0000   

Epoch 2/8
----------
Train |     1/1    | loss:    1.5051 | fps:   11.4019 | acc:    1.0000   
Valid |     1/1    | loss:    1.5750 | fps:    3.7288 | acc:    0.0000   

Epoch 3/8
----------
Train |     1/1    | loss:    1.4029 | fps:   10.8510 | acc:    1.0000   
Valid |     1/1    | loss:    1.5870 | fps:    3.7698 | acc:    0.0000   

Epoch 4/8
----------
Train |     1/1    | loss:    1.3150 | fps:   10.7113 | acc:    1.0000   
Valid |     1/1    | loss:    1.5957 | fps:    3.6949 | acc:    0.0000   

Epoch 5/8
----------
Train |     1/1    | loss:    1.2596 | fps:   11.5989 | acc:    1.0000   
Valid |     1/1    | loss:    1.6005 | fps:    3.7353 | acc:    0.0000   

Epoch 6/8
----------
Train |     1/1    | loss:    1.2