In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from random import randint

from torchvision import datasets, models, transforms
import torchvision

import pandas as pd

import numpy as np

import random
import os

%matplotlib inline
import matplotlib.pyplot as plt
import json
import cv2
import logging

from torch.utils.data import Dataset, DataLoader

from torch.optim.lr_scheduler import ReduceLROnPlateau
import imgaug.augmenters as iaa


path_local = '../../../DATA/spoof_sets/CelebA_Spoof/'#'../input/celeba-spoof-for-face-antispoofing/CelebA_Spoof_/CelebA_Spoof/'
PATH_TO_MODEL = '../../../../DATA/SolovyevDA/models/hub/mobilenet_v2.pt'#'../models/hub/mobilenet_v2.pt' '../models/hub/wide_resnet50_2.pt'

In [None]:
sometimes = lambda aug: iaa.Sometimes(0.5, aug)
seq = iaa.Sequential([
    iaa.SomeOf((0, 5),
    [
        iaa.AdditiveGaussianNoise(scale=(0, 0.2*255)),
        sometimes(iaa.AdditiveLaplaceNoise(scale=(0, 0.2*255))),
        iaa.CoarseDropout((0.0, 0.05), size_percent=(0.25, 0.35)),
        iaa.JpegCompression(compression=(50, 75)),
        sometimes(iaa.SaltAndPepper(0.1, per_channel=True)),
        iaa.MotionBlur(k=15),
        sometimes(iaa.GaussianBlur(sigma=(0.0, 3.0))),
        sometimes(iaa.RemoveSaturation(0.25)),
        sometimes(iaa.AddToHueAndSaturation((-10, 10), per_channel=True)),
        sometimes(iaa.LogContrast(gain=(0.6, 1.4)))
    ]),
    iaa.size.PadToSquare(),
    iaa.Rotate((0, 20)),
    iaa.size.Resize((224, 224))
])

In [None]:
def get_class(x):
    if x == 0:
        return 0
    return 1

In [None]:
def filter_df(df, lower_n = False, binary = False, n = 6, onlydisplay=False):
    if onlydisplay:
        df = df[[x in [0,7,8,9] for x in list(df['Class'])]].reset_index()
    
    if lower_n:
        df = df[df['Class'] < n].reset_index()
        
    if binary:
        df['Class'] = df['Class'].apply(lambda x: get_class(x))
    
    return df

In [None]:
class FocalLoss(nn.modules.loss._WeightedLoss):
    def __init__(self, weight=None, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__(weight, reduction=reduction)
        self.gamma = gamma
        self.weight = weight
        
    def forward(self, input, target):
        ce_loss = F.cross_entropy(input, target, reduction=self.reduction, weight=self.weight)
        pt = torch.exp(-ce_loss)
        focal_loss = ((1-pt)**self.gamma*ce_loss).mean()
        
        return focal_loss

In [None]:
data_transforms = {
    'train': transforms.Compose([
        seq.augment_image,
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        iaa.size.PadToSquare().augment_image,
        transforms.ToPILImage(),
        transforms.Resize((224, 224)),   
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  
    ]),
}

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [None]:
def create_model(n_classes):
    model = torchvision.models.mobilenet_v2(pretrained=False)
    model.load_state_dict(torch.load(PATH_TO_MODEL))
    model.classifier[1] = nn.Linear(1280, n_classes)
    
    return model

In [None]:
def random_blur(img, qual):
    ksize = (qual, qual)
    i = randint(1, 4)
    w, h = img.shape[1], img.shape[0]
    
    if w // 2 == 0 or h // 2 == 0:
        return img
    
    if i == 1:
        img[:h//2, :w//2] = cv2.blur(img[:h//2, :w//2], ksize)
    elif i == 2:
        img[:h//2, w//2:] = cv2.blur(img[:h//2, w//2:], ksize)
    elif i == 3:
        img[h//2:, :w//2] = cv2.blur(img[h//2:, :w//2], ksize)
    else:
        img[h//2:, w//2:] = cv2.blur(img[h//2:, w//2:], ksize)
    
    return img

In [None]:
def random_crop(img):
    
    i = randint(1, 4)
    w, h = img.shape[1], img.shape[0]
    
    if i == 1:
        return img[:h//2, :w//2]
    elif i == 2:
        return img[:h//2, w//2:]
    elif i == 3:
        return img[h//2:, :w//2]
    else:
        return img[h//2:, w//2:]
    

In [None]:
def pad(img, y1, y2, x1, x2, percent):
    h, w = img.shape[0], img.shape[1]
    if ((y2 - y1) * (x2 - x1)) / (h * w) >= percent:
        return img
    else:
        newS = ((y2 - y1) * (x2 - x1)) / percent
        newD = np.sqrt(newS).astype(int) // 2

        center_x = (x1 + x2) / 2.
        center_y = (y1 + y2) / 2.
        
        x1_pad = int(center_x - newD)
        x2_pad = int(center_x + newD)
        y1_pad = int(center_y - newD)
        y2_pad = int(center_y + newD)
       
        l = x1_pad >= 0
        t = y1_pad >= 0
        r = x2_pad <= w
        b = y2_pad <= h

        if not l and r:
            x1_pad = 0
            x2_pad = newD * 2
        elif not l and not r:
            x1_pad = 0
            x2_pad = w
        elif not r and l:
            x2_pad = w
            x1_pad = w - newD * 2
            
            if x1_pad < 0:
                x1_pad = 0
        
        if not t and b:
            y1_pad = 0
            y2_pad = newD * 2
        elif not t and not b:
            y1_pad = 0
            y2_pad = h
        elif not b and t:
            y2_pad = h
            y1_pad = h - newD * 2
            
            if y1_pad < 0:
                y1_pad = 0
    

            
        return img[y1_pad:y2_pad, x1_pad:x2_pad, :]
            

def read_image_with_padding_adap(image_path, p=0.3):

    img = cv2.imread(image_path)
    real_h,real_w,c = img.shape
    assert os.path.exists(image_path[:-4] + '.txt'),'path not exists' + ' ' + image_path # check ur labeles in ur dataset, mb u need "+ '_BB.txt'" or smth else
    
    with open(image_path[:-4] + '.txt','r') as f:
        material = f.readline()
        try:
            x,y,w,h = material.strip().split(' ')
        except:
            logging.info('Bounding Box of' + ' ' + image_path + ' ' + 'is wrong')  

        try:
            w = int(float(w))
            h = int(float(h))
            x = int(float(x))
            y = int(float(y))

            
            if x < 0:
              x = 0
            
            if y < 0:
              y = 0

            w = w - x
            h = h - y
            
    
            img = pad(img, y, y+h, x, x+w, p)
            # insert here needed func for ur pipeline (crop/blur)

        except:
            logging.info('Cropping Bounding Box of' + ' ' + image_path + ' ' + 'goes wrong')

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        
    return img


def read_image_with_padding_default(image_path):

    img = cv2.imread(image_path)
    real_h,real_w,c = img.shape
    pad_scale = 0.35
    assert os.path.exists(image_path[:-4] + '.txt'),'path not exists' + ' ' + image_path # check ur labeles in ur dataset, mb u need "+ '_BB.txt'" or smth else
    
    with open(image_path[:-4] + '.txt','r') as f:
        material = f.readline()
        try:
            x,y,w,h = material.strip().split(' ')
        except:
            logging.info('Bounding Box of' + ' ' + image_path + ' ' + 'is wrong')  

        try:
            w = int(float(w))
            h = int(float(h))
            x = int(float(x))
            y = int(float(y))

            
            if x < 0:
              x = 0
            
            if y < 0:
              y = 0

            w = w - x
            h = h - y
            
    
                
            # Make face bbox square
            x1_sq = int(x - ((h - w) / 2))
            x2_sq = int(x + w + ((h - w) / 2))
            y1_sq = y
            y2_sq = y + h
            
            h_sq = y2_sq - y1_sq
            w_sq = x2_sq - x1_sq
            x_sq = x1_sq
            y_sq = y1_sq
            
            y1_sq = 0 if y_sq < 0 else y_sq
            x1_sq = 0 if x_sq < 0 else x_sq
            y2_sq = real_h if y2_sq > real_h else y2_sq 
            x2_sq = real_w if x2_sq > real_w else x2_sq
            
            # create padding coordinats
            h_pad = h_sq + int(h_sq * pad_scale)
            w_pad = w_sq + int(w_sq * pad_scale)
            x_pad = x_sq - int(w_sq * pad_scale / 2)
            y_pad = y_sq - int(h_sq * pad_scale / 2)
            
            y1_pad = 0 if y_pad < 0 else y_pad
            x1_pad = 0 if x_pad < 0 else x_pad
            y2_pad = real_h if y1_pad + h_pad > real_h else y_pad + h_pad
            x2_pad = real_w if x1_pad + w_pad > real_w else x_pad + w_pad

            # Crop face based on its bounding box
            y1 = 0 if y < 0 else y
            x1 = 0 if x < 0 else x 
            y2 = real_h if y1 + h > real_h else y + h
            x2 = real_w if x1 + w > real_w else x + w
            
            # extract padding region
            y1_outborder = (y1 == y1_pad)
            x1_outborder = (x1 == x1_pad)
            y2_outborder = (y2 == y2_pad)
            x2_outborder = (x2 == x2_pad)  
                
            img = img[y1_pad:y2_pad,x1_pad:x2_pad,:]
            # insert here needed func for ur pipeline (crop/blur)
        except:
            logging.info('Cropping Bounding Box of' + ' ' + image_path + ' ' + 'goes wrong')

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        
    return img


def read_image_with_padding_face_only(image_path):

    img = cv2.imread(image_path)
    real_h,real_w,c = img.shape
    assert os.path.exists(image_path[:-4] + '.txt'),'path not exists' + ' ' + image_path # check ur labeles in ur dataset, mb u need "+ '_BB.txt'" or smth else
    
    with open(image_path[:-4] + '.txt','r') as f:
        material = f.readline()
        try:
            x,y,w,h = material.strip().split(' ')
        except:
            logging.info('Bounding Box of' + ' ' + image_path + ' ' + 'is wrong')  

        try:
            w = int(float(w))
            h = int(float(h))
            x = int(float(x))
            y = int(float(y))

            
            if x < 0:
              x = 0
            
            if y < 0:
              y = 0

            w = w - x
            h = h - y
            
    
            img = img[y:y+h, x:x+w, :]
            # insert here needed func for ur pipeline (crop/blur)

        except:
            logging.info('Cropping Bounding Box of' + ' ' + image_path + ' ' + 'goes wrong')

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        
    return img

In [None]:
df_train = pd.read_csv('train_v2.csv')
df_train = filter_df(df_train, True, True, 6, False)
print(len(df_train))

df_val = pd.read_csv('val_v2.csv')
df_val = filter_df(df_val, True, True, 6, False)
print(len(df_val))

df_test = pd.read_csv('test.csv')
df_test = filter_df(df_test, True, True, 6, False)
print(len(df_test))

237471
87349
45411


In [None]:
class CelebAspoofDataset(Dataset):
    def __init__(self, root, meta, transform=None):
        self.root = root
        self.meta = meta # csv file with cols "Filepath", "Class"
        self.transform = transform

    def __len__(self):
        return len(self.meta)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root, self.meta['Filepath'][idx])
        face = read_image_with_padding_adap(img_path) # insert here needed func for reading image (default, adaptive or face only)
        
        
        label = self.meta['Class'][idx]   
        
        if self.transform:
            face = self.transform(face)
            
        return (face, label)

In [None]:
train = CelebAspoofDataset(path_local, df_train, data_transforms['train'])
val = CelebAspoofDataset(path_local, df_val, data_transforms['test'])
test = CelebAspoofDataset(path_local, df_test, data_transforms['test'])

In [None]:
batch_size = 128
epochs = 500

In [None]:
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=1)
val_loader = DataLoader(val, batch_size=batch_size, shuffle=False, num_workers=1)
test_loader = DataLoader(test, batch_size=batch_size, shuffle=False, num_workers=1)

In [None]:
model = create_model(len(np.unique(list(df_train['Class'])))).to(device)

In [None]:
from sklearn.utils.class_weight import compute_class_weight

class_wts = compute_class_weight(class_weight='balanced', classes=np.unique(list(df_train['Class'])), y=list(df_train['Class']))

print(class_wts)

[0.99516813 1.00487902]


In [None]:
weights = torch.tensor(class_wts, dtype=torch.float)
weights = weights.to(device)

optimizer = optim.AdamW(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=2,verbose=True)
criterion = FocalLoss(weight=weights)

In [None]:
def train():
  
    model.train()

    total_loss = 0.0
  
    total_preds=[]
  
    for step,batch in enumerate(train_loader):
        if step % 50 == 0 and not step == 0:
            print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(train_loader)))
        batch = [r.to(device) for r in batch]
        
        images, labels = batch

        model.zero_grad() 
        
        preds = model(images)
        
        loss = criterion(preds, labels)
        total_loss = total_loss + loss.item()
        loss.backward()
        optimizer.step()
        preds=preds.detach().cpu().numpy()
        total_preds.append(preds)

    avg_loss = total_loss / len(train_loader)

    total_preds  = np.concatenate(total_preds, axis=0)

    return avg_loss, total_preds

In [None]:
def evaluate(loader):
  
    print("\nEvaluating...")
  
    model.eval()

    total_loss = 0.0
  
    total_preds = []

    for step,batch in enumerate(loader):
    
        if step % 50 == 0 and not step == 0:
                  
            print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(loader)))

        batch = [t.to(device) for t in batch]
        
        #faces, fulls, labels = batch
        images, labels = batch

        with torch.no_grad():
      
            #preds = model(faces, fulls)
            preds = model(images)

            loss = criterion(preds,labels)

            total_loss = total_loss + loss.item()

            preds = preds.detach().cpu().numpy()

            total_preds.append(preds)

    avg_loss = total_loss / len(loader) 

    total_preds  = np.concatenate(total_preds, axis=0)

    return avg_loss, total_preds

In [None]:
from sklearn.metrics import f1_score
# set initial loss to infinite
best_valid_loss = float('inf')

# empty lists to store training and validation loss of each epoch
train_losses=[]
valid_losses=[]
test_losses=[]

ys = list(filter_df(df_test, False, True)['Class'])

#for each epoch
for epoch in range(epochs):
     
    print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))
    train_loss, _ = train()

    valid_loss, _ = evaluate(val_loader)
    scheduler.step(valid_loss)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        
    torch.save(model.state_dict(), 'pad_0.3/mobnet_v2_' + str(epoch) + '.pt')

    train_losses.append(train_loss)
    valid_losses.append(valid_loss)
    
    
    with open('pad_0.3/train_loss.txt', 'a') as f:
        f.write(str(train_loss) + '\n')
        
        
    with open('pad_0.3/valid_loss.txt', 'a') as f:
        f.write(str(valid_loss) + '\n')

        
    test_loss, preds = evaluate(test_loader)
    test_losses.append(test_loss)
    
    
    with open('pad_0.3/test_loss.txt', 'a') as f:
        f.write(str(test_loss) + '\n')
        
    preds = np.argmax(preds, axis = 1)
    
    f_score = f1_score(list(df_test['Class']), preds, average = 'macro')
    

    print(f'\nTraining Loss: {train_loss:.3f}')
    print(f'Validation Loss: {valid_loss:.3f}')
    print(f'\nF-score on test-set: {f_score:.3f}')

print(f'\nBest valid loss: {best_valid_loss:.3f}')


 Epoch 1 / 500
  Batch    50  of  1,856.
  Batch   100  of  1,856.
  Batch   150  of  1,856.
  Batch   200  of  1,856.
  Batch   250  of  1,856.
  Batch   300  of  1,856.
  Batch   350  of  1,856.
  Batch   400  of  1,856.
  Batch   450  of  1,856.
  Batch   500  of  1,856.
  Batch   550  of  1,856.
  Batch   600  of  1,856.
  Batch   650  of  1,856.
  Batch   700  of  1,856.
  Batch   750  of  1,856.
  Batch   800  of  1,856.
  Batch   850  of  1,856.
  Batch   900  of  1,856.
  Batch   950  of  1,856.
  Batch 1,000  of  1,856.
  Batch 1,050  of  1,856.
  Batch 1,100  of  1,856.
  Batch 1,150  of  1,856.
  Batch 1,200  of  1,856.
  Batch 1,250  of  1,856.
  Batch 1,300  of  1,856.
  Batch 1,350  of  1,856.
  Batch 1,400  of  1,856.
  Batch 1,450  of  1,856.
  Batch 1,500  of  1,856.
  Batch 1,550  of  1,856.
  Batch 1,600  of  1,856.
  Batch 1,650  of  1,856.
  Batch 1,700  of  1,856.
  Batch 1,750  of  1,856.
  Batch 1,800  of  1,856.
  Batch 1,850  of  1,856.

Evaluating...
  Batch

  Batch   400  of  1,856.
  Batch   450  of  1,856.
  Batch   500  of  1,856.
  Batch   550  of  1,856.
  Batch   600  of  1,856.
  Batch   650  of  1,856.
  Batch   700  of  1,856.
  Batch   750  of  1,856.
  Batch   800  of  1,856.
  Batch   850  of  1,856.
  Batch   900  of  1,856.
  Batch   950  of  1,856.
  Batch 1,000  of  1,856.
  Batch 1,050  of  1,856.
  Batch 1,100  of  1,856.
  Batch 1,150  of  1,856.
  Batch 1,200  of  1,856.
  Batch 1,250  of  1,856.
  Batch 1,300  of  1,856.
  Batch 1,350  of  1,856.
  Batch 1,400  of  1,856.
  Batch 1,450  of  1,856.
  Batch 1,500  of  1,856.
  Batch 1,550  of  1,856.
  Batch 1,600  of  1,856.
  Batch 1,650  of  1,856.
  Batch 1,700  of  1,856.
  Batch 1,750  of  1,856.
  Batch 1,800  of  1,856.
  Batch 1,850  of  1,856.

Evaluating...
  Batch    50  of    683.
  Batch   100  of    683.
  Batch   150  of    683.
  Batch   200  of    683.
  Batch   250  of    683.
  Batch   300  of    683.
  Batch   350  of    683.
  Batch   400  of    68

  Batch   650  of  1,856.
  Batch   700  of  1,856.
  Batch   750  of  1,856.
  Batch   800  of  1,856.
  Batch   850  of  1,856.
  Batch   900  of  1,856.
  Batch   950  of  1,856.
  Batch 1,000  of  1,856.
  Batch 1,050  of  1,856.
  Batch 1,100  of  1,856.
  Batch 1,150  of  1,856.
  Batch 1,200  of  1,856.
  Batch 1,250  of  1,856.
  Batch 1,300  of  1,856.
  Batch 1,350  of  1,856.
  Batch 1,400  of  1,856.
  Batch 1,450  of  1,856.
  Batch 1,500  of  1,856.
  Batch 1,550  of  1,856.
  Batch 1,600  of  1,856.
  Batch 1,650  of  1,856.
  Batch 1,700  of  1,856.
  Batch 1,750  of  1,856.
  Batch 1,800  of  1,856.
  Batch 1,850  of  1,856.

Evaluating...
  Batch    50  of    683.
  Batch   100  of    683.
  Batch   150  of    683.
  Batch   200  of    683.
  Batch   250  of    683.
  Batch   300  of    683.
  Batch   350  of    683.
  Batch   400  of    683.
  Batch   450  of    683.
  Batch   500  of    683.
  Batch   550  of    683.
  Batch   600  of    683.
  Batch   650  of    68

  Batch 1,050  of  1,856.
  Batch 1,100  of  1,856.
  Batch 1,150  of  1,856.
  Batch 1,200  of  1,856.
  Batch 1,250  of  1,856.
  Batch 1,300  of  1,856.
  Batch 1,350  of  1,856.
  Batch 1,400  of  1,856.
  Batch 1,450  of  1,856.
  Batch 1,500  of  1,856.
  Batch 1,550  of  1,856.
  Batch 1,600  of  1,856.
  Batch 1,650  of  1,856.
  Batch 1,700  of  1,856.
  Batch 1,750  of  1,856.
  Batch 1,800  of  1,856.
  Batch 1,850  of  1,856.

Evaluating...
  Batch    50  of    683.
  Batch   100  of    683.
  Batch   150  of    683.
  Batch   200  of    683.
  Batch   250  of    683.
  Batch   300  of    683.
  Batch   350  of    683.
  Batch   400  of    683.
  Batch   450  of    683.
  Batch   500  of    683.
  Batch   550  of    683.
  Batch   600  of    683.
  Batch   650  of    683.

Evaluating...
  Batch    50  of    355.
  Batch   100  of    355.
  Batch   150  of    355.
  Batch   200  of    355.
  Batch   250  of    355.
  Batch   300  of    355.
  Batch   350  of    355.

Trainin

  Batch 1,300  of  1,856.
  Batch 1,350  of  1,856.
  Batch 1,400  of  1,856.
  Batch 1,450  of  1,856.
  Batch 1,500  of  1,856.
  Batch 1,550  of  1,856.
  Batch 1,600  of  1,856.
  Batch 1,650  of  1,856.
  Batch 1,700  of  1,856.
  Batch 1,750  of  1,856.
  Batch 1,800  of  1,856.
  Batch 1,850  of  1,856.

Evaluating...
  Batch    50  of    683.
  Batch   100  of    683.
  Batch   150  of    683.
  Batch   200  of    683.
  Batch   250  of    683.
  Batch   300  of    683.
  Batch   350  of    683.
  Batch   400  of    683.
  Batch   450  of    683.
  Batch   500  of    683.
  Batch   550  of    683.
  Batch   600  of    683.
  Batch   650  of    683.

Evaluating...
  Batch    50  of    355.
  Batch   100  of    355.
  Batch   150  of    355.
  Batch   200  of    355.
  Batch   250  of    355.
  Batch   300  of    355.
  Batch   350  of    355.

Training Loss: 0.000
Validation Loss: 0.000

F-score on test-set: 0.858

 Epoch 22 / 500
  Batch    50  of  1,856.
  Batch   100  of  1,8