In [7]:
# Specify hyperparameters
epochs = 50
learning_rate = 0.004
momentum = 0.9
batch_size = 32
num_workers = 6

# Specify model to train
model_name = "ResNet50"
assert model_name in ["ResNet18", "ResNet50", "DenseNet121", "SqueezeNet1.1", "InceptionV3"]

#from IPython.display import clear_output # This will clear previous print statements
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all" #This is to keep all print statements

In [2]:
import torch.nn as nn
import torchvision
from datetime import datetime
from pathlib import Path

now = datetime.now()
now = now.strftime("%Y-%m-%d-%H%M")
Path(f'./TrainingResults/{model_name}/{now}').mkdir(parents=True, exist_ok=True)
f = open(f'./TrainingResults/{model_name}/{now}/hyperparameters.txt', 'w+')
f.write(f'epochs={epochs}\nlearning rate={learning_rate}\nbatch size={batch_size}')
f.close()

if model_name == "ResNet18":
    trained_model_path = f'./TrainingResults/{model_name}/{now}/resnet18_pretrained.pt'
    model_conv = torchvision.models.resnet18(pretrained=True)
    
    for param in model_conv.parameters():
        param.requires_grad = False

    num_ftrs = model_conv.fc.in_features
    model_conv.fc = nn.Linear(num_ftrs, 2)
    
elif model_name == "ResNet50":
    trained_model_path = f'./TrainingResults/{model_name}/{now}/resnet50_pretrained.pt'
    model_conv = torchvision.models.resnet50(pretrained=True)
    
    for param in model_conv.parameters():
        param.requires_grad = False

    num_ftrs = model_conv.fc.in_features
    model_conv.fc = nn.Linear(num_ftrs, 2)
         
elif model_name == "DenseNet121":
    trained_model_path = f'./TrainingResults/{model_name}/{now}/densenet121_pretrained.pt'
    model_conv = torchvision.models.densenet121(pretrained=True)
    
    for param in model_conv.parameters():
        param.requires_grad = False

    num_ftrs = model_conv.classifier.in_features
    model_conv.classifier = nn.Linear(num_ftrs, 2)
    
elif model_name == "SqueezeNet1.1":
    trained_model_path = f'./TrainingResults/{model_name}/{now}/squeezenet1-1_pretrained.pt'
    model_conv = torchvision.models.squeezenet1_1(pretrained=True)
    
    for param in model_conv.parameters():
        param.requires_grad = False
    
    # SqueezeNet fine tuning adapted from https://discuss.pytorch.org/t/fine-tuning-squeezenet/3855
    num_ftrs = 512
    model_conv.classifier = nn.Sequential(
        nn.Dropout(p=0.5),
        nn.Conv2d(num_ftrs, 2, kernel_size=1),
        nn.ReLU(inplace=True),
        nn.AvgPool2d(13)
    )
    
elif model_name == "InceptionV3":
    trained_model_path = f'.TrainingResults/{model_name}/{now}/inceptionv3_pretrained.pt'
    model_conv = torchvision.models.inception_v3(pretrained=True)
    
    for param in model_conv.parameters():
        param.requires_grad = False
    
    num_ftrs = model_conv.AuxLogits.fc.in_features
    model_conv.AuxLogits.fc = nn.Linear(num_ftrs, 2)  
    num_ftrs = model_conv.fc.in_features        
    model_conv.fc = nn.Linear(num_ftrs, 2)
    

In [3]:
# Data transformations and subsequent training adapted from https://github.com/shervinmin/DeepCovid
from torchvision import transforms

img_size = 299 if (model_name == "InceptionV3") else 224

data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(img_size),
        transforms.RandomResizedCrop(img_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(img_size),
        transforms.CenterCrop(img_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

In [4]:
import os
import torch
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

data_dir = './data/'

image_datasets = {x: ImageFolder(os.path.join(data_dir, x),
                                 data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: DataLoader(image_datasets[x], batch_size=batch_size,
                             shuffle=True, num_workers=num_workers)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes  ## 0: child, and 1: nonchild

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [5]:
import copy
import time
import torch
import matplotlib.pyplot as plt
from termcolor import colored

%matplotlib

def train_model(model, criterion, optimizer, scheduler, batch_szie, num_epochs=20):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    val_losses = []
    train_losses = []
    val_accuracy = []
    train_accuracy = []

    for epoch in range(num_epochs):
        #clear_output(wait=True)
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            running_prec= 0.0
            running_rec = 0.0
            running_f1  = 0.0


            # Iterate over data.
            cur_batch_ind= 0
            for inputs, labels in dataloaders[phase]:
                batch_start_time = time.time()

                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                
                # forward
                # track history if only in train
                if not isinstance(model, torchvision.models.inception.Inception3):
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                        # backward + optimize only if in training phase
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()
                else:
                    with torch.set_grad_enabled(phase == 'train'):
                        if phase == 'train':
                            outputs, aux_outputs = model(inputs)

                            loss1 = criterion(outputs, labels)
                            loss2 = criterion(aux_outputs, labels)
                            loss = loss1 + 0.4*loss2
                        else:
                            outputs = model(inputs)
                            loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

                cur_acc= torch.sum(preds == labels.data).double()/batch_szie
                cur_batch_ind +=1
                # print("\npreds:", preds)
                # print("label:", labels.data)
                batch_time = time.time() - batch_start_time
                comparison = "".join(colored("=", "green") if b else colored("X", "red") for b in preds == labels)
                #print("%d-th epoch, %d-th batch (size=%d), %s acc= %.3f, %.3f s: %s" %(epoch+1, cur_batch_ind, len(labels), phase, cur_acc, batch_time, comparison ))

            epoch_loss= running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            if phase == 'train':
                train_losses.append(epoch_loss)
                train_accuracy.append(epoch_acc)
            else:
                val_losses.append(epoch_loss)
                val_accuracy.append(epoch_acc)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_epoch= epoch
                best_model_wts = copy.deepcopy(model.state_dict())


    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc= %.3f at Epoch: %d' %(best_acc,best_epoch) )
    
    # Plot learning curve    
    plt.figure()
    x = list(range(1, num_epochs + 1))
    plt.plot(x, val_losses, label='val')
    plt.plot(x, train_losses, label='train')
    plt.legend()
    plt.ylim(0,1)
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.savefig(f'./TrainingResults/{model_name}/{now}/train-val-loss-by-epoch.png')

    plt.figure()
    x = list(range(1, num_epochs + 1))
    plt.plot(x, val_accuracy, label='val')
    plt.plot(x, train_accuracy, label='train')
    plt.legend()
    plt.ylim(0,1)
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.savefig(f'./TrainingResults/{model_name}/{now}/train-val-accuracy-by-epoch.png')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model


Using matplotlib backend: Qt5Agg


In [6]:
import time
from torch import optim
from torch.optim import lr_scheduler
from torchvision import models
 

start_time = time.time()

model_conv = model_conv.to(device)
criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as opposed to before
if (model_name == "ResNet18") or (model_name == "ResNet50"):
    optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=learning_rate, momentum=momentum)
elif (model_name == "DenseNet121") or (model_name == "SqueezeNet1.1"):
    optimizer_conv = optim.SGD(model_conv.classifier.parameters(), lr=learning_rate, momentum=momentum)
elif model_name == "InceptionV3":
    params_to_update = []
    for name,param in model_conv.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            
    optimizer_conv = optim.SGD(params_to_update, lr=learning_rate, momentum=momentum)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

model_conv = train_model(model_conv, criterion, optimizer_conv,
                      exp_lr_scheduler, batch_size, num_epochs=epochs)
model_conv.eval()
torch.save(model_conv, trained_model_path)

end_time= time.time()
print("total_time tranfer learning=", end_time - start_time)


Epoch 1/50
----------




train Loss: 0.5081 Acc: 0.7565
val Loss: 0.3999 Acc: 0.8230
Epoch 2/50
----------
train Loss: 0.4484 Acc: 0.7906
val Loss: 0.6059 Acc: 0.8062
Epoch 3/50
----------
train Loss: 0.4783 Acc: 0.7835
val Loss: 0.2975 Acc: 0.8734
Epoch 4/50
----------
train Loss: 0.4225 Acc: 0.8135
val Loss: 0.2856 Acc: 0.8811
Epoch 5/50
----------
train Loss: 0.4325 Acc: 0.8061
val Loss: 0.2678 Acc: 0.8915
Epoch 6/50
----------
train Loss: 0.4373 Acc: 0.8060
val Loss: 0.3288 Acc: 0.8637
Epoch 7/50
----------
train Loss: 0.3540 Acc: 0.8430
val Loss: 0.2740 Acc: 0.8863
Epoch 8/50
----------
train Loss: 0.3604 Acc: 0.8432
val Loss: 0.2901 Acc: 0.8779
Epoch 9/50
----------
train Loss: 0.3439 Acc: 0.8478
val Loss: 0.2826 Acc: 0.8779
Epoch 10/50
----------
train Loss: 0.3412 Acc: 0.8466
val Loss: 0.2824 Acc: 0.8786
Epoch 11/50
----------
train Loss: 0.3532 Acc: 0.8423
val Loss: 0.2658 Acc: 0.8934
Epoch 12/50
----------
train Loss: 0.3426 Acc: 0.8462
val Loss: 0.2723 Acc: 0.8895
Epoch 13/50
----------
train Loss: 

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

total_time tranfer learning= 1371.157935142517


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
