# 1. Baseline

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Libraries

In [None]:
%cd /content/drive/MyDrive/ColabNotebooks/ArcFace/dataset


/content/drive/MyDrive/ColabNotebooks/ArcFace/dataset


In [None]:
!unzip -q lfw_aligned_112.zip
!unzip -q MS1M.zip

In [None]:
%pwd

'/content/drive/MyDrive/ColabNotebooks/ArcFace/dataset'

In [None]:
import os
from pathlib import Path
from tqdm import tqdm
from easydict import EasyDict as edict

import torch
import torch.nn  as nn
import torch.optim as optim
import torchvision.utils as vutils
from torchvision import transforms as trans

from data.ms1m import get_train_loader
from data.lfw import LFW

from backbone.arcfacenet import SEResNet_IR
from margin.ArcMarginProduct import ArcMarginProduct

from util.utils import save_checkpoint, test

## Configuration

In [None]:
conf = edict()
conf.train_root = '/content/drive/MyDrive/ColabNotebooks/ArcFace/dataset/MS1M'
conf.lfw_test_root = '/content/drive/MyDrive/ColabNotebooks/ArcFace/dataset/lfw_aligned_112'
conf.lfw_file_list = '/content/drive/MyDrive/ColabNotebooks/ArcFace/dataset/lfw_pair.txt'

conf.mode = 'se_ir'  # or 'ir'                        if it's 'ir' we should use 100 or 152 for depth
conf.depth = 50 # or '100','152' (layers)
conf.margin_type = 'ArcFace'
conf.feature_dim = 512  # 512 is generally used
conf.scale_siz = 32.0
conf.batch_size = 96 # If we do not have enough memory we can set it to 16 or 32
conf.lr = 0.01
conf.milestones = [8,10,12] # reducing the learning rate in epochs 8, 10 and 12
conf.total_epoch = 14

conf.save_folder = './saved'
conf.save_dir = os.path.join(conf.save_folder,conf.mode + '_' + str(conf.depth)) # ./saved/se_ir_50
conf.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
conf.num_workers = 4 # to laod data into the cpu
conf.pin_memory = True  # speed up cpu

print(conf.train_root)






/content/drive/MyDrive/ColabNotebooks/ArcFace/dataset/MS1M


In [None]:
os.makedirs(conf.save_dir, exist_ok=True)


./saved/se_ir_50


## Data Loader

In [None]:
%pwd
%cd /content/drive/MyDrive/ColabNotebooks/ArcFace/dataset

/content/drive/MyDrive/ColabNotebooks/ArcFace/dataset


In [None]:
transform = trans.Compose([
    trans.ToTensor(),  # range [0,255] --> [0.0,1.0]
    trans.Normalize(mean = (0.5, 0.5, 0.5),std = (0.5, 0.5, 0.5))  #make the model more durable for much brightness condition
])
print(conf)

trainloader , class_num = get_train_loader(conf)


{'train_root': '/content/drive/MyDrive/ColabNotebooks/ArcFace/dataset/MS1M', 'lfw_test_root': '/content/drive/MyDrive/ColabNotebooks/ArcFace/dataset/lfw_aligned_112', 'lfw_file_list': '/content/drive/MyDrive/ColabNotebooks/ArcFace/dataset/lfw_pair.txt', 'mode': 'se_ir', 'depth': 50, 'margin_type': 'ArcFace', 'feature_dim': 512, 'scale_siz': 32.0, 'batch_size': 96, 'lr': 0.01, 'milestones': [8, 10, 12], 'total_epoch': 14, 'save_folder': './saved', 'save_dir': './saved/se_ir_50', 'device': device(type='cuda', index=0), 'num_workers': 4, 'pin_memory': True}




In [None]:
print('number of class',class_num) # 200 different people

number of class 200


In [None]:
print(trainloader.dataset) #totally we have 29148 images

Dataset ImageFolder
    Number of datapoints: 29148
    Root location: /content/drive/MyDrive/ColabNotebooks/ArcFace/dataset/MS1M
    StandardTransform
Transform: Compose(
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
           )


In [None]:
lfwdataset = LFW(conf.lfw_test_root, conf.lfw_file_list,transform = transform) #for test data
lfwloader = torch.utils.data.DataLoader(lfwdataset, batch_size = 128, num_workers=conf.num_workers)



# Model

In [None]:
print(conf.device)

cuda:0


In [None]:
net = SEResNet_IR(conf.depth,feature_dim = conf.feature_dim,mode = conf.mode).to(conf.device)
margin = ArcMarginProduct(conf.feature_dim,class_num).to(conf.device)

In [None]:
print(net)

SEResNet_IR(
  (input_layer): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): PReLU(num_parameters=64)
  )
  (output_layer): Sequential(
    (0): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): Dropout(p=0.4, inplace=False)
    (2): Flatten()
    (3): Linear(in_features=25088, out_features=512, bias=True)
    (4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (body): Sequential(
    (0): BottleNeck_IR_SE(
      (shortcut_layer): MaxPool2d(kernel_size=1, stride=2, padding=0, dilation=1, ceil_mode=False)
      (res_layer): Sequential(
        (0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (2): BatchNorm2d(64, eps=1e-05, moment

In [None]:
criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(net.parameters(),lr = conf.lr,momentum = 0.9,weight_decay = 5e-4)
optimizer = optim.SGD([
    {'params' : net.parameters(), 'weight_decay' : 5e-4},
    {'params' : margin.parameters(),'weight_decay' : 5e-4}
], lr=conf.lr, momentum=0.9,nesterov=True)


In [None]:
print(optimizer)

SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.01
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0.0005

Parameter Group 1
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.01
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0.0005
)


In [None]:
def schedule_lr():
  for params in optimizer.param_groups:
    params['lr'] /= 10
  print(optimizer)


## Train

In [None]:
%cd ..

/content/drive/MyDrive/ColabNotebooks/ArcFace


In [None]:
best_acc = 0
print('Using :',conf.device)
for epoch in range(1,conf.total_epoch+1):

  net.train()
  print('epoch {}/{}'.format(epoch,conf.total_epoch),flush = True)

  if epoch == conf.milestones[0]: #8
    schedule_lr()
  elif epoch == conf.milestones[1]: #10
    schedule_lr()
  elif epoch == conf.milestones[2]: #12
    schedule_lr()

  for data in tqdm(trainloader):   #we use tqdm for visualize      & one iteration
    img,label = data[0].to(conf.device),data[1].to(conf.device)
    optimizer.zero_grad()

    logits = net(img)
    output = margin(logits,label)
    total_loss = criterion(output,label)
    total_loss.backward()
    optimizer.step()



    #test

  net.eval()

  lfw_acc = test(conf, net, lfwdataset, lfwloader)

  print('\nLFW: {:.4f}  | train_loss: {:.4f}\n'.format(lfw_acc, total_loss.item()))

  is_best = lfw_acc > best_acc
  best_acc = max(lfw_acc,best_acc)

  save_checkpoint({
      'epoch':epoch,
      'net_state_dict':net.state_dict(),
      'margin_state_dict':margin.state_dict(),
      'best_acc':best_acc,
   },is_best,checkpoint = conf.save_dir)



    #saving model


Using : cuda:0
epoch 1/14


100%|██████████| 304/304 [04:32<00:00,  1.12it/s]



LFW: 0.6752  | train_loss: 20.8867

best model saved

epoch 2/14


100%|██████████| 304/304 [04:31<00:00,  1.12it/s]



LFW: 0.6855  | train_loss: 20.2237

best model saved

epoch 3/14


100%|██████████| 304/304 [04:31<00:00,  1.12it/s]



LFW: 0.6995  | train_loss: 19.5757

best model saved

epoch 4/14


100%|██████████| 304/304 [04:31<00:00,  1.12it/s]



LFW: 0.7095  | train_loss: 19.0973

best model saved

epoch 5/14


100%|██████████| 304/304 [04:31<00:00,  1.12it/s]



LFW: 0.7215  | train_loss: 18.9208

best model saved

epoch 6/14


100%|██████████| 304/304 [04:34<00:00,  1.11it/s]



LFW: 0.7322  | train_loss: 18.8750

best model saved

epoch 7/14


100%|██████████| 304/304 [04:31<00:00,  1.12it/s]



LFW: 0.7333  | train_loss: 18.2739

best model saved

epoch 8/14
SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 1.0000000000000002e-06
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0.0005

Parameter Group 1
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 1.0000000000000002e-06
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0.0005
)


100%|██████████| 304/304 [04:31<00:00,  1.12it/s]



LFW: 0.7335  | train_loss: 18.3600

best model saved

epoch 9/14


100%|██████████| 304/304 [04:32<00:00,  1.12it/s]



LFW: 0.7340  | train_loss: 18.0994

best model saved

epoch 10/14
SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 1.0000000000000002e-07
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0.0005

Parameter Group 1
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 1.0000000000000002e-07
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0.0005
)


100%|██████████| 304/304 [04:31<00:00,  1.12it/s]


In [None]:
'''
SOTA: State of the art

1. Downloading the full dataset of MS1M and LFW
2. conf.mode = 'ir'
3. conf.depth = 100
4. conf.total_epoch = 20
5. conf.milestones = [12,16,18]

lfw = 99.83%

2 v100 (32gb) --> 5 days for training


If you want to use this weight on your device ,camera or something like that check out MobileFaceNet
'''