In [1]:
import torch
import torchvision

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torch.optim import lr_scheduler

import time
from copy import deepcopy

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import *

from FaceMaskDataset import FaceMaskDataset
from FaceMaskModel import MobileNetV3

sns.set()


Matplotlib is building the font cache using fc-list. This may take a moment.


In [None]:
def train_model(model, train_dataloader, eval_dataloaders, datasets_sizes, criterion, optimizer, device='cuda:0', num_epochs=20, print_epoch=1, scheduler=None, data_types=['train', 'test'], save_model=False):
    start_time = time.time()
    best_f1 = 0.0
    best_model = deepcopy(model.state_dict())
    
    for epoch in range(0, num_epochs + 1):
        start_epoch = time.time()
        print("Epoch [{}/{}]".format(str(epoch).zfill(len(str(num_epochs))), num_epochs))
        
        if epoch != 0:
            model.train()

            for _, images, labels in train_dataloader:
                images = images.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(True):
                    outputs = model(images)
                    
                    loss = criterion(outputs, labels)
                    
                    loss.backward()
                    optimizer.step()
            if scheduler is not None:
                scheduler.step()
           
        model.eval()
        
        for data_type in data_types:
            epoch_loss = 0.0
            
            y_true = np.empty(0)
            y_pred = np.empty(0)
            y_score = np.empty(0)
        
            for _, images, labels in eval_dataloaders[data_type]:
                y_true = np.append(y_true, labels.numpy())
                
                images = images.to(device)
                labels = labels.to(device)
                                
                with torch.set_grad_enabled(False):
                    outputs = model(images)
            
                    loss = criterion(outputs, labels)
                
                    scores = F.softmax(outputs, 1)[:,1]
                    y_score = np.append(y_score, scores.to('cpu').numpy())
                    
                    _, pred = torch.max(outputs, 1)
                    y_pred = np.append(y_pred, pred.to('cpu').numpy())
                        
                epoch_loss += loss.item() * images.size(0)
                
            epoch_loss = epoch_loss / datasets_sizes[data_type]
            epoch_acc = accuracy_score(y_true, y_pred) * 100
            epoch_p = precision_score(y_true, y_pred, zero_division=0) * 100
            epoch_r = recall_score(y_true, y_pred, zero_division=0) * 100
            epoch_f1 = f1_score(y_true, y_pred, average='binary', zero_division=0) * 100
            epoch_roc_auc = roc_auc_score(y_true, y_score) * 100
            
            print('{} Loss: {:.4f} F1: {:2.2f} Precision: {:2.2f} Recall: {:2.2f} Accuracy: {:2.2f} ROC-AUC: {:2.2f}'.format(
                data_type.ljust(5), epoch_loss, epoch_f1, epoch_p, epoch_r, epoch_acc, epoch_roc_auc))
            
            if data_type == 'test' and epoch_f1 > best_f1:
                best_f1 = epoch_f1
                best_model = deepcopy(model.state_dict())
        
        epoch_elapsed = time.time() - start_epoch
        print('Epoch {} took {}m {:.0f}s'.format(
        epoch, int(epoch_elapsed // 60), epoch_elapsed % 60))
        
        print('-' * 90)
        
    time_elapsed = time.time() - start_time
    print('Training complete in {}m {:.0f}s'.format(
    int(time_elapsed // 60), time_elapsed % 60))
    
    print('Best test F1: {:2f}'.format(best_f1))
    if save_model:
        torch.save(best_model, 'model_{:2f}.pkl'.format(best_f1))
    model.load_state_dict(best_model)
    return model

In [4]:
root_dir='/StudentData/hw2_data'
phases = ['train', 'eval']
data_types = ['train', 'test']
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [5]:

train_dataset = FaceMaskDataset(root_dir=f'{root_dir}/train', have_label=True, phase='train')
eval_datasets = {data_type : FaceMaskDataset(root_dir=f'{root_dir}/{data_type}', have_label=True,  phase='eval') for data_type in data_types}

datasets_sizes = {data_type : len(eval_datasets[data_type]) for data_type in data_types}

In [6]:
batch_sizes = {
    'train' : 128,
    'eval' : 64
}

shuffles = {
    'train' : True,
    'eval' : False
}

train_dataloader = DataLoader(train_dataset, batch_sizes['train'], shuffles['train'])
eval_dataloaders = {data_type : DataLoader(eval_datasets[data_type], batch_sizes['eval'], shuffles['eval']) for data_type in data_types}

In [8]:
model = MobileNetV3(n_class=2, input_size=224, dropout=0.0, mode='large', width_mult=1.0)
model = model.to(device)
print('Total Number of Parameters: {:.2f}M'.format(sum(param.numel() for param in model.parameters()) / 1e6))

In [11]:
# learning_rate *= gamma every (step_size) epochs
learning_rate = 1e-3
step_size = 5
gamma = 1e-1

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

In [None]:
best_model = train_model(model, train_dataloader, eval_dataloaders, datasets_sizes, criterion, optimizer, device=device, scheduler=scheduler)

Epoch [00/20]
train Loss: 0.6931 F1: 45.92 Precision: 42.41 Recall: 50.06 Accuracy: 35.76 ROC-AUC: 50.00
test  Loss: 0.6931 F1: 45.74 Precision: 42.06 Recall: 50.12 Accuracy: 35.70 ROC-AUC: 50.00
Epoch 0 took 1m 48s
------------------------------------------------------------------------------------------
Epoch [01/20]
train Loss: 0.1913 F1: 93.90 Precision: 90.83 Recall: 97.19 Accuracy: 93.12 ROC-AUC: 97.99
test  Loss: 0.1815 F1: 94.30 Precision: 91.25 Recall: 97.57 Accuracy: 93.62 ROC-AUC: 98.19
Epoch 1 took 4m 16s
------------------------------------------------------------------------------------------
Epoch [02/20]
train Loss: 0.1577 F1: 95.15 Precision: 92.76 Recall: 97.66 Accuracy: 94.57 ROC-AUC: 98.66
test  Loss: 0.1553 F1: 95.42 Precision: 93.07 Recall: 97.90 Accuracy: 94.92 ROC-AUC: 98.76
Epoch 2 took 4m 16s
------------------------------------------------------------------------------------------
Epoch [03/20]
train Loss: 0.1379 F1: 95.64 Precision: 95.06 Recall: 96.23 Accur

In [None]:
torch.save(best_model.state_dict(), "mask_model.pt")