In [None]:
import os
import torch
from torch import nn
import torch.optim as optim
from torchvision import transforms, models, datasets
import time
import copy

### User transfer learning to build new classification model
# refer from https://www.udemy.com/course/pytorch-best/learn/lecture/17161282?start=540#overview

In [2]:
data_dir = '/home/duhuaiyu/Downloads/facemaskdata/temp'
train_dir = data_dir + '/train'
valid_dir = data_dir + '/valid'

# define transforms to auto process images

In [3]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(32),
        transforms.RandomApply(
            torch.nn.ModuleList([transforms.GaussianBlur(kernel_size=(5,5))]), p = 0.2),
        transforms.RandomRotation(5),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ColorJitter(brightness=0.2, contrast=0.1, saturation=0.1, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])#
    ]),
    'valid':
        transforms.Compose([
        transforms.Resize(32),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Load training data and test data

In [15]:
batch_size = 500

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'valid']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True) for x in ['train', 'valid']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}
class_names = image_datasets['train'].classes

In [6]:
# frozen all layers if needed
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [7]:
# define a Identity layer used in reconstruct vgg16 model
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
    def forward(self, x):
        return x

# reconstruct vgg16 model, fix input size to 32 by 32
def initialize_model_first_train(num_classes, feature_extract, use_pretrained=True):

    model_ft = models.vgg16(pretrained=use_pretrained)
    set_parameter_requires_grad(model_ft, feature_extract)
    # replace avgpool with Identity layers since input size fixed to 32 by 32
    model_ft.avgpool = Identity()
    # redefine classifer layers
    model_ft.classifier= nn.Sequential(
            nn.Linear(512, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
    )
    return model_ft

In [8]:
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

CUDA is available!  Training on GPU ...


In [23]:
# frozen all feature extract layers
feature_extract = True
model_ft = initialize_model_first_train(4, feature_extract, use_pretrained=True)

# use GPU computate
model_ft = model_ft.to(device)
#print(model_ft)
# save model file name
filename='checkpoint_first_round.pth'
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)
else:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [10]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=50, is_inception=False, filename="checkpoint.pth"):
    since = time.time()
    best_acc = 0
    model.to(device)

    val_acc_history = []
    train_acc_history = []
    train_losses = []
    valid_losses = []
    LRs = [optimizer.param_groups[0]['lr']]

    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # train and validation
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()  # train
            else:
                model.eval()  # validation

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                #print(phase,inputs.shape,labels.shape)

                inputs = inputs.to(device)
                labels = labels.to(device)

                # clear gradient
                optimizer.zero_grad()
                # only update gradient during training phase
                with torch.set_grad_enabled(phase == 'train'):
                    #print(inputs.shape)
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # update gradient
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # compute loss
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            time_elapsed = time.time() - since
            print('Time elapsed {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # save the model with best accuracy
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                state = {
                    'state_dict': model.state_dict(),
                    'best_acc': best_acc,
                    'optimizer': optimizer.state_dict(),
                }
                torch.save(state, filename)
            if phase == 'valid':
                val_acc_history.append(epoch_acc)
                valid_losses.append(epoch_loss)
                scheduler.step()
            if phase == 'train':
                train_acc_history.append(epoch_acc)
                train_losses.append(epoch_loss)

        print('Optimizer learning rate : {:.7f}'.format(optimizer.param_groups[0]['lr']))
        LRs.append(optimizer.param_groups[0]['lr'])
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load the best accuracy model
    model.load_state_dict(best_model_wts)
    return model, val_acc_history, train_acc_history, valid_losses, train_losses, LRs

In [11]:
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.5)
criterion = nn.CrossEntropyLoss()

NameError: name 'params_to_update' is not defined

### Train the model the first time

In [36]:
model_ft, val_acc_history, train_acc_history, valid_losses, train_losses, LRs  = train_model(model_ft, dataloaders, criterion, optimizer_ft, num_epochs=50, is_inception=False)

Epoch 0/49
----------
Time elapsed 0m 36s
train Loss: 0.7869 Acc: 0.6987
Time elapsed 0m 38s
valid Loss: 0.4660 Acc: 0.8668
Optimizer learning rate : 0.0010000

Epoch 1/49
----------
Time elapsed 1m 15s
train Loss: 0.5387 Acc: 0.8215
Time elapsed 1m 17s
valid Loss: 0.3445 Acc: 0.8944
Optimizer learning rate : 0.0010000

Epoch 2/49
----------
Time elapsed 1m 53s
train Loss: 0.4722 Acc: 0.8428
Time elapsed 1m 55s
valid Loss: 0.2842 Acc: 0.9131
Optimizer learning rate : 0.0010000

Epoch 3/49
----------
Time elapsed 2m 31s
train Loss: 0.4424 Acc: 0.8518
Time elapsed 2m 34s
valid Loss: 0.2620 Acc: 0.9184
Optimizer learning rate : 0.0010000

Epoch 4/49
----------
Time elapsed 3m 10s
train Loss: 0.4208 Acc: 0.8588
Time elapsed 3m 12s
valid Loss: 0.2404 Acc: 0.9258
Optimizer learning rate : 0.0010000

Epoch 5/49
----------
Time elapsed 3m 48s
train Loss: 0.4056 Acc: 0.8630
Time elapsed 3m 51s
valid Loss: 0.2353 Acc: 0.9247
Optimizer learning rate : 0.0010000

Epoch 6/49
----------
Time elapsed

### the Following cells used to enhance the trained model,
#load model from file and use new data to train it more epoch

In [17]:
def fix_layer_number(model, layer_number):
    for idx,param in enumerate(model.parameters()):
        if idx > layer_number:
            break;
        param.requires_grad = False
def loadModel(filePath, fixed_layer_num):
    # 选择合适的模型，不同模型的初始化方法稍微有点区别
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = models.vgg16(pretrained=False)
    model.avgpool = Identity()
    model.classifier = nn.Sequential(
        nn.Linear(512, 4096),
        nn.ReLU(True),
        nn.Dropout(),
        nn.Linear(4096, 4096),
        nn.ReLU(True),
        nn.Dropout(),
        nn.Linear(4096, 4),
    )
    checkpoint = torch.load(filePath)
    model.load_state_dict(checkpoint['state_dict'])
    fix_layer_number(model, fixed_layer_num)
    model = model.to(device)
    return model
# load the model trained previous, frozen first 15th layers
model_continue = loadModel("checkpoint_31_12.pth",15)
params_to_update = model_continue.parameters()
print("Params to learn:")

params_to_update = []
for name,param in model_continue.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 features.19.weight
	 features.19.bias
	 features.21.weight
	 features.21.bias
	 features.24.weight
	 features.24.bias
	 features.26.weight
	 features.26.bias
	 features.28.weight
	 features.28.bias
	 classifier.0.weight
	 classifier.0.bias
	 classifier.3.weight
	 classifier.3.bias
	 classifier.6.weight
	 classifier.6.bias


In [18]:
# user same strategy train again
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.5)#学习率每7个epoch衰减成原来的1/10
criterion = nn.CrossEntropyLoss()
model_continue, val_acc_history, train_acc_history, valid_losses, train_losses, LRs  = train_model(model_continue, dataloaders, criterion, optimizer_ft, num_epochs=30, is_inception=False,filename="checkpoint_1_1.pth")

Epoch 0/29
----------
Time elapsed 1m 33s
train Loss: 0.1876 Acc: 0.9310
Time elapsed 1m 38s
valid Loss: 0.1190 Acc: 0.9588
Optimizer learning rate : 0.0010000

Epoch 1/29
----------
Time elapsed 2m 56s
train Loss: 0.1586 Acc: 0.9408
Time elapsed 3m 1s
valid Loss: 0.0970 Acc: 0.9655
Optimizer learning rate : 0.0010000

Epoch 2/29
----------
Time elapsed 4m 17s
train Loss: 0.1450 Acc: 0.9468
Time elapsed 4m 21s
valid Loss: 0.1036 Acc: 0.9648
Optimizer learning rate : 0.0010000

Epoch 3/29
----------
Time elapsed 5m 36s
train Loss: 0.1380 Acc: 0.9499
Time elapsed 5m 40s
valid Loss: 0.1100 Acc: 0.9615
Optimizer learning rate : 0.0010000

Epoch 4/29
----------
Time elapsed 6m 55s
train Loss: 0.1288 Acc: 0.9535
Time elapsed 6m 59s
valid Loss: 0.0822 Acc: 0.9728
Optimizer learning rate : 0.0010000

Epoch 5/29
----------
Time elapsed 8m 14s
train Loss: 0.1222 Acc: 0.9551
Time elapsed 8m 18s
valid Loss: 0.1118 Acc: 0.9616
Optimizer learning rate : 0.0010000

Epoch 6/29
----------
Time elapsed 