In [1]:
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [2]:
import os
import numpy as np
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import cv2
from random import shuffle
from PIL import Image

%matplotlib inline

In [3]:
TARGET_SIZE = (224, 224) #(250, 250)
INPUT_SIZE = (224, 224) #(192,192)
#INPUT_SIZE = (299, 299)

DATA_DIR = '/home/HDD6TB/datasets/images/vggface2/'
ALL_DATA_DIR = DATA_DIR+'all/'
PREPARED_DATA_DIR = DATA_DIR+'tf1/'#'faces_%d/'%(TARGET_SIZE[0])
TRAIN_DATA_DIR = PREPARED_DATA_DIR+'train'
VAL_DATA_DIR = PREPARED_DATA_DIR+'val'
print(TRAIN_DATA_DIR,VAL_DATA_DIR)

/home/HDD6TB/datasets/images/vggface2/tf1/train /home/HDD6TB/datasets/images/vggface2/tf1/val


In [4]:
def log_progress(sequence, every=None, size=None, name='Items'):
    from ipywidgets import IntProgress, HTML, VBox
    from IPython.display import display

    is_iterator = False
    if size is None:
        try:
            size = len(sequence)
        except TypeError:
            is_iterator = True
    if size is not None:
        if every is None:
            if size <= 200:
                every = 1
            else:
                every = int(size / 200)     # every 0.5%
    else:
        assert every is not None, 'sequence is iterator, set every'

    if is_iterator:
        progress = IntProgress(min=0, max=1, value=1)
        progress.bar_style = 'info'
    else:
        progress = IntProgress(min=0, max=size, value=0)
    label = HTML()
    box = VBox(children=[label, progress])
    display(box)

    index = 0
    try:
        for index, record in enumerate(sequence, 1):
            if index == 1 or index % every == 0:
                if is_iterator:
                    label.value = '{name}: {index} / ?'.format(
                        name=name,
                        index=index
                    )
                else:
                    progress.value = index
                    label.value = u'{name}: {index} / {size}'.format(
                        name=name,
                        index=index,
                        size=size
                    )
            yield record
    except:
        progress.bar_style = 'danger'
        raise
    else:
        progress.bar_style = 'success'
        progress.value = index
        label.value = "{name}: {index}".format(
            name=name,
            index=str(index or '?')
        )

---

# Prepare dataset

In [5]:
TRAIN_RATIO = 0.9

In [6]:
def is_specialfile(path,exts):
    _, file_extension = os.path.splitext(path)
    return file_extension.lower() in exts

img_extensions=['.jpg','.jpeg','.png']
def is_image(path):
    return is_specialfile(path,img_extensions)

In [8]:
from facial_analysis import FacialImageProcessing
imgProcessing=FacialImageProcessing()

Tensor("age_pred/Softmax:0", shape=(?, 100), dtype=float32) Tensor("gender_pred/Sigmoid:0", shape=(?, 1), dtype=float32) Tensor("global_pooling/Mean:0", shape=(?, 1024), dtype=float32)
Tensor("input_1:0", shape=(?, 224, 224, 3), dtype=float32) 224 224


In [18]:
def process_image(src_file,dest_file):
    try:
        #Image.open(src_file).crop(bbox).save(dest_file)
        #Image.open(src_file).thumbnail(INPUT_SIZE, Image.ANTIALIAS).save(dest_file)
        im = Image.open(src_file)
        im=im.resize(TARGET_SIZE, Image.LANCZOS)
        im.save(dest_file)
        #print(src_file,dest_file)
    except IOError:
        print ("cannot create thumbnail for '%s'" % src_file)


def process_face_image(src_file,dest_file):
    try:
        draw = cv2.imread(src_file)
        img=cv2.cvtColor(draw,cv2.COLOR_BGR2RGB)
        bounding_boxes, points = imgProcessing.detect_faces(img)
        best_bb=[]
        best_square=0
        for b in bounding_boxes:
            b=[int(bi) for bi in b]
            #print(b,img.shape)
            x1,y1,x2,y2=b[0:4]
            if x2>x1 and y2>y1:
                sq=(x2-x1)*(y2-y1)
                if sq>best_square:
                    best_square=sq
                    best_bb=b
        
        if len(best_bb)!=0:
            img_h,img_w,_=img.shape
            face_x,face_y=best_bb[0],best_bb[1]
            face_w,face_h=(best_bb[2]-best_bb[0]),(best_bb[3]-best_bb[1])
            dw,dh=20,40#max(int(face_w*0.05),10),max(int(face_h*0.05),10)
            #sz=max(face_w+2*dw,face_h+2*dh)
            #dw,dh=(sz-face_w)//2,(sz-face_h)//2

            box = (max(0,face_x-dw), max(0,face_y-dh), min(img_w,face_x+face_w+dw), min(img_h,face_y+face_h+dh))
            
            face_img=draw[box[1]:box[3],box[0]:box[2],:]
            face_img = cv2.resize(face_img, TARGET_SIZE)
            cv2.imwrite(dest_file,face_img)
        else:
            print('No faces found for ', src_file)
        
        #print(src_file,dest_file)
    except IOError:
        print ("cannot create facial image for '%s'" % src_file)


In [2]:
for person in log_progress(os.listdir(ALL_DATA_DIR)):
    #print(person)
    person_dir=os.path.join(ALL_DATA_DIR,person)
    files=[f for f in os.listdir(person_dir) if is_image(f)]
    shuffle(files)
    train_files=files[: int(len(files) * TRAIN_RATIO)]
    train_dir=os.path.join(TRAIN_DATA_DIR,person)
    if not os.path.exists(train_dir):
        os.makedirs(train_dir)
    for f in train_files:
        #process_image(os.path.join(person_dir,f),os.path.join(train_dir,f))
        process_face_image(os.path.join(person_dir,f),os.path.join(train_dir,f))

    val_files=files[int(len(files) * TRAIN_RATIO):]
    val_dir=os.path.join(VAL_DATA_DIR,person)
    if not os.path.exists(val_dir):
        os.makedirs(val_dir)
    for f in val_files:
        #process_image(os.path.join(person_dir,f),os.path.join(val_dir,f))
        process_face_image(os.path.join(person_dir,f),os.path.join(val_dir,f))
    

---

# Train

In [5]:
import glob
from itertools import chain
import os
import random
import zipfile

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from PIL import Image
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
from tqdm.notebook import tqdm

print(f"Torch: {torch.__version__}")

Torch: 1.7.1+cu110


In [6]:
# Training settings
batch_size = 48#64 #48# 32 #8 #16 #
epochs = 40
lr = 3e-5
gamma = 0.7
seed = 42
device = 'cuda'
use_cuda = torch.cuda.is_available()
print(use_cuda)

True


In [7]:
#net_description='affectnet_'+net_description
train_dir,test_dir=TRAIN_DATA_DIR,VAL_DATA_DIR

print(train_dir,test_dir)

/home/HDD6TB/datasets/images/vggface2/tf1/train /home/HDD6TB/datasets/images/vggface2/tf1/val


In [8]:
IMG_SIZE_ORIG=256
IMG_SIZE=224 # 300 # 80 #
train_transforms = transforms.Compose(
    [
        transforms.Resize((IMG_SIZE_ORIG,IMG_SIZE_ORIG)),
        #transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.8,1.2), shear=0.2),
        transforms.RandomResizedCrop(IMG_SIZE),
        #transforms.Resize((IMG_SIZE,IMG_SIZE)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    ]
)

test_transforms = transforms.Compose(
    [
        transforms.Resize((IMG_SIZE,IMG_SIZE)),
        #transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    ]
)

In [9]:
kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}

train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transforms)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, **kwargs)
test_dataset = datasets.ImageFolder(root=test_dir, transform=test_transforms)
test_loader  = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, **kwargs) 

print(len(train_dataset), len(test_dataset))

3067564 243722


In [10]:
num_classes=len(train_dataset.classes)
print(num_classes)

9131


In [11]:
# loss function
criterion = nn.CrossEntropyLoss()

In [12]:
from robust_optimization import RobustOptimizer

In [13]:
def test(model):
    epoch_val_accuracy = 0
    epoch_val_loss = 0
    model.eval()
    with torch.no_grad():
        for data, label in test_loader:
            data = data.to(device)
            label = label.to(device)

            val_output = model(data)
            val_loss = criterion(val_output, label)

            acc = (val_output.argmax(dim=1) == label).float().sum()
            epoch_val_accuracy += acc
            epoch_val_loss += val_loss
    epoch_val_accuracy /= len(test_dataset)
    epoch_val_loss /= len(test_dataset)
    return epoch_val_accuracy,epoch_val_loss

In [14]:
import copy
def train(model,n_epochs=epochs, learningrate=lr, robust=False):
    # optimizer
    if robust:
        optimizer = RobustOptimizer(filter(lambda p: p.requires_grad, model.parameters()), optim.Adam, lr=learningrate)
    else:
        optimizer=optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learningrate)
    # scheduler
    #scheduler = StepLR(optimizer, step_size=1, gamma=gamma)
    best_acc=0
    best_model=None
    for epoch in range(n_epochs):
        epoch_loss = 0
        epoch_accuracy = 0
        model.train()
        for data, label in tqdm(train_loader):
            data = data.to(device)
            label = label.to(device)

            output = model(data)
            loss = criterion(output, label)

            if robust:
                #optimizer.zero_grad()
                loss.backward()
                optimizer.first_step(zero_grad=True)
  
                # second forward-backward pass
                output = model(data)
                loss = criterion(output, label)
                loss.backward()
                optimizer.second_step(zero_grad=True)
            else:
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            acc = (output.argmax(dim=1) == label).float().sum()
            epoch_accuracy += acc
            epoch_loss += loss
        epoch_accuracy /= len(train_dataset)
        epoch_loss /= len(train_dataset)

        epoch_val_accuracy,epoch_val_loss=test(model)
        print(
            f"Epoch : {epoch+1} - loss : {epoch_loss:.4f} - acc: {epoch_accuracy:.4f} - val_loss : {epoch_val_loss:.4f} - val_acc: {epoch_val_accuracy:.4f}\n"
        )
        if best_acc<epoch_val_accuracy:
            best_acc=epoch_val_accuracy
            best_model=copy.deepcopy(model.state_dict())
        #scheduler.step()
    
    if best_model is not None:
        model.load_state_dict(best_model)
        print(f"Best acc:{best_acc}")
        epoch_val_accuracy,epoch_val_loss=test(model)
        print(
            f"val_loss : {epoch_val_loss:.4f} - val_acc: {epoch_val_accuracy:.4f}\n"
        )
    else:
        print(f"No best model Best acc:{best_acc}")

In [15]:
from torchvision.models import resnet101,mobilenet_v2
import timm
#model=resnet101(pretrained=True)
#model=mobilenet_v2(pretrained=True)
#model=torch.hub.load('rwightman/gen-efficientnet-pytorch', 'efficientnet_b0', pretrained=True)
model=timm.create_model('rexnet_150', pretrained=True) #'vit_base_patch16_224' 'tf_efficientnet_b4_ns'
#model=timm.create_model('tf_efficientnet_b2_ns', pretrained=True,features_only=True) 
print(model)

ReXNetV1(
  (stem): ConvBnAct(
    (conv): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn): BatchNormAct2d(
      48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (act): SiLU(inplace=True)
    )
  )
  (features): Sequential(
    (0): LinearBottleneck(
      (conv_dw): ConvBnAct(
        (conv): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
        (bn): BatchNormAct2d(
          48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (act): Identity()
        )
      )
      (act_dw): ReLU6()
      (conv_pwl): ConvBnAct(
        (conv): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNormAct2d(
          24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (act): Identity()
        )
      )
    )
    (1): LinearBottleneck(
      (conv_exp): ConvBnAct(
        (conv): Conv2d(24, 144, kernel_size=(1, 1), st

In [16]:
#model.classifier=nn.Linear(in_features=1536, out_features=num_classes) #1792 #1536 #1280 #1408
model.head.fc=nn.Linear(in_features=1920, out_features=num_classes)
model=model.to(device)
print(model)

ReXNetV1(
  (stem): ConvBnAct(
    (conv): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn): BatchNormAct2d(
      48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (act): SiLU(inplace=True)
    )
  )
  (features): Sequential(
    (0): LinearBottleneck(
      (conv_dw): ConvBnAct(
        (conv): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
        (bn): BatchNormAct2d(
          48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (act): Identity()
        )
      )
      (act_dw): ReLU6()
      (conv_pwl): ConvBnAct(
        (conv): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNormAct2d(
          24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (act): Identity()
        )
      )
    )
    (1): LinearBottleneck(
      (conv_exp): ConvBnAct(
        (conv): Conv2d(24, 144, kernel_size=(1, 1), st

In [22]:
if True:
    img = torch.randn(1, 3, IMG_SIZE, IMG_SIZE).to(device)
    model=model.to(device)
    model.eval()
    f=model.forward(img)
    print(f.shape)
    model.train()

torch.Size([1, 9131])


In [17]:
#adapted from https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html
def set_parameter_requires_grad(model, requires_grad):
    for param in model.parameters():
        param.requires_grad = requires_grad

In [18]:
set_parameter_requires_grad(model, requires_grad=False)
#set_parameter_requires_grad(model.classifier, requires_grad=True)
set_parameter_requires_grad(model.head.fc, requires_grad=True)
train(model,1,0.001,robust=True)

  0%|          | 0/63908 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceed

Epoch : 1 - loss : 8.4975 - acc: 0.0269 - val_loss : 7.9544 - val_acc: 0.0354

Best acc:0.0354219526052475
val_loss : 7.9544 - val_acc: 0.0354



In [19]:
set_parameter_requires_grad(model, requires_grad=True)
train(model,10,1e-4,robust=True)

  0%|          | 0/63908 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceed

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceed

Epoch : 5 - loss : 0.9608 - acc: 0.8278 - val_loss : 0.2339 - val_acc: 0.9567



  0%|          | 0/63908 [00:05<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceed

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceed

In [20]:
epoch_val_accuracy,epoch_val_loss=test(model)
print(f"val_loss : {epoch_val_loss:.4f} - val_acc: {epoch_val_accuracy:.4f}\n")
#val_loss : 0.2196 - val_acc: 0.9564

val_loss : 0.1776 - val_acc: 0.9678



In [21]:
#PATH = "vggface2_enet3_new.pt"
PATH = "vggface2_rexnet_150.pt"

In [22]:
torch.save(model, PATH)

In [25]:
model = torch.load(PATH)
model.eval()

ReXNetV1(
  (stem): ConvBnAct(
    (conv): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn): BatchNormAct2d(
      48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (act): SiLU(inplace=True)
    )
  )
  (features): Sequential(
    (0): LinearBottleneck(
      (conv_dw): ConvBnAct(
        (conv): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
        (bn): BatchNormAct2d(
          48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (act): Identity()
        )
      )
      (act_dw): ReLU6()
      (conv_pwl): ConvBnAct(
        (conv): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNormAct2d(
          24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (act): Identity()
        )
      )
    )
    (1): LinearBottleneck(
      (conv_exp): ConvBnAct(
        (conv): Conv2d(24, 144, kernel_size=(1, 1), st

In [26]:
if False:
    class Identity(nn.Module):
        def __init__(self):
            super(Identity, self).__init__()

        def forward(self, x):
            return x

#model.classifier=torch.nn.Identity()
model.head.fc=torch.nn.Identity()
torch.save(model.state_dict(), 'state_'+PATH)

In [27]:
torch.save(model, 'features_'+PATH)