Breast tumour classification with SqueezeNet
=============================


Using SqueezeNet 1.0:  
With training the model from scratch without any pre-training, SqueezeNet has been able to achieve **59.2%** validation accuracy.  
With freezing and training the final layers, SqueezeNet has been able to achieve **66.7%** validation accuracy.  
With training the entire model after initalising with pre-training weights, SqueezeNet has been able to achieve **82.2%** validation accuracy.

Using SqueezeNet 1.1:  
With training the entire model after initalising with pre-training weights, SqueezeNet has been able to achieve **81.5%** validation accuracy in 41 minutes.

In [4]:
from __future__ import print_function 
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
%matplotlib inline

Initial parameters
-------------------

In [5]:
data_dir = "C:/Users/Renee/Documents/2019/Semester 2/COMP5703/Data/breakhis"

num_classes = 8

batch_size = 16

num_epochs = 20

# False to finetune the whole model. True to freeze and update only last layers
feature_extract = False

Model
-----

In [6]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25):
    start = time.time()

    val_acc_history = []
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                
                torch.save(model, 'torchvision_squeezenet.pt')
                
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - start
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [7]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [8]:
def initialise_model(num_classes, feature_extract, use_pretrained=True):

    # using squeezenet 1.0
    model_ft = models.squeezenet1_0(pretrained=use_pretrained) #change this line to change model
    
    set_parameter_requires_grad(model_ft, feature_extract)
    model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
    model_ft.num_classes = num_classes
    input_size = 224

    return model_ft, input_size

# Initialise model
model_ft, input_size = initialise_model(num_classes, feature_extract, use_pretrained=True)

print(model_ft)

SqueezeNet(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (3): Fire(
      (squeeze): Conv2d(96, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (4): Fire(
      (squeeze): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (5): Fire(
   

Load Data
---------


In [9]:
# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], 
                             [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], 
                             [0.229, 0.224, 0.225])
    ]),
}

print("Initializing Datasets and Dataloaders...")

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}        


Initializing Datasets and Dataloaders...


In [18]:
def samples_by_class(train_imgs, num_classes):                        
    samples = [0] * num_classes                                                      
    for img in train_imgs:                                                         
        samples[img[1]] += 1  
        
    weights = [0.] * num_classes                                                                
    for i in range(num_classes):                                                   
        weights[i] = float(sum(samples))/float(samples[i])
        
    sample_weights = [0] * len(train_imgs)                                              
    for idx, val in enumerate(train_imgs):                                          
        sample_weights[idx] = weights[val[1]]                                  
    return sample_weights, weights       

In [19]:
sample_weights, class_weights = samples_by_class(image_datasets['train'], num_classes)
sample_weights = torch.DoubleTensor(sample_weights).to(device)
class_weights = torch.FloatTensor(class_weights).to(device)

sampler = torch.utils.data.sampler.WeightedRandomSampler(sample_weights, len(sample_weights))

In [22]:
# Create training and validation dataloaders
#dataloaders_dict = {
#    'train': torch.utils.data.DataLoader(image_datasets['train'], batch_size=batch_size, shuffle=True, num_workers=0),
#    'val': torch.utils.data.DataLoader(image_datasets['val'], batch_size=batch_size, num_workers=0)
#}
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train','val']}

# Send the model to GPU
model_ft = model_ft.to(device)

# Select which parameters to learn
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)
else:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

Params to learn:
	 features.0.weight
	 features.0.bias
	 features.3.squeeze.weight
	 features.3.squeeze.bias
	 features.3.expand1x1.weight
	 features.3.expand1x1.bias
	 features.3.expand3x3.weight
	 features.3.expand3x3.bias
	 features.4.squeeze.weight
	 features.4.squeeze.bias
	 features.4.expand1x1.weight
	 features.4.expand1x1.bias
	 features.4.expand3x3.weight
	 features.4.expand3x3.bias
	 features.5.squeeze.weight
	 features.5.squeeze.bias
	 features.5.expand1x1.weight
	 features.5.expand1x1.bias
	 features.5.expand3x3.weight
	 features.5.expand3x3.bias
	 features.7.squeeze.weight
	 features.7.squeeze.bias
	 features.7.expand1x1.weight
	 features.7.expand1x1.bias
	 features.7.expand3x3.weight
	 features.7.expand3x3.bias
	 features.8.squeeze.weight
	 features.8.squeeze.bias
	 features.8.expand1x1.weight
	 features.8.expand1x1.bias
	 features.8.expand3x3.weight
	 features.8.expand3x3.bias
	 features.9.squeeze.weight
	 features.9.squeeze.bias
	 features.9.expand1x1.weight
	 features.

Training and Validation
--------------------------------


In [23]:
# Setup the loss fxn
criterion = nn.CrossEntropyLoss(weight=class_weights)

# Train and evaluate
model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs)


Epoch 0/19
----------
train Loss: 0.5782 Acc: 0.7514
val Loss: 0.7885 Acc: 0.6217

Epoch 1/19
----------
train Loss: 0.5803 Acc: 0.7379
val Loss: 0.5156 Acc: 0.7882

Epoch 2/19
----------
train Loss: 0.5536 Acc: 0.7543
val Loss: 0.5918 Acc: 0.7102

Epoch 3/19
----------
train Loss: 0.5769 Acc: 0.7500
val Loss: 0.4927 Acc: 0.7608

Epoch 4/19
----------
train Loss: 0.5241 Acc: 0.7665
val Loss: 0.5785 Acc: 0.7935

Epoch 5/19
----------
train Loss: 0.4972 Acc: 0.7843
val Loss: 0.4642 Acc: 0.7629

Epoch 6/19
----------
train Loss: 0.5275 Acc: 0.7710
val Loss: 0.5158 Acc: 0.7671

Epoch 7/19
----------
train Loss: 0.5108 Acc: 0.7705
val Loss: 0.4266 Acc: 0.8019

Epoch 8/19
----------
train Loss: 0.5157 Acc: 0.7733
val Loss: 0.4948 Acc: 0.8251

Epoch 9/19
----------
train Loss: 0.4609 Acc: 0.7898
val Loss: 0.8406 Acc: 0.6828

Epoch 10/19
----------
train Loss: 0.4846 Acc: 0.7807
val Loss: 0.4866 Acc: 0.7798

Epoch 11/19
----------
train Loss: 0.4625 Acc: 0.7922
val Loss: 0.5328 Acc: 0.7671

Ep

In [24]:
import torch
import gc
#del model_ft
#gc.collect()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_cached(0)/1024**3,1), 'GB')
    

GeForce GTX 965M
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


Train model from scratch
--------------------------------

In [None]:
# Initialize the non-pretrained version of the model used for this run
scratch_model,_ = initialise_model(model_name, num_classes, feature_extract=False, use_pretrained=False)
scratch_model = scratch_model.to(device)
scratch_optimizer = optim.SGD(scratch_model.parameters(), lr=0.001, momentum=0.9)
scratch_criterion = nn.CrossEntropyLoss()
_,scratch_hist = train_model(scratch_model, dataloaders_dict, scratch_criterion, scratch_optimizer, num_epochs=num_epochs)

In [None]:
# Plot the training curves of validation accuracy vs. number 
#  of training epochs for the transfer learning method and
#  the model trained from scratch
p_hist = []
s_hist = []

p_hist = [h.cpu().numpy() for h in hist]
s_hist = [h.cpu().numpy() for h in scratch_hist]

plt.title("Validation Accuracy vs. Number of Training Epochs")
plt.xlabel("Training Epochs")
plt.ylabel("Validation Accuracy")
plt.plot(range(1,num_epochs+1),p_hist,label="Pretrained")
plt.plot(range(1,num_epochs+1),s_hist,label="Scratch")
plt.ylim((0,1.))
plt.xticks(np.arange(1, num_epochs+1, 1.0))
plt.legend()
plt.show()

Confusion Matrix
-----------------------------------

In [25]:
confusion_matrix = torch.zeros(num_classes, num_classes)

with torch.no_grad():
    for i, (inputs, labels) in enumerate(dataloaders_dict['val']):
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model_ft(inputs)
        _, preds = torch.max(outputs, 1)
        for t, p in zip(labels.view(-1), preds.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

print(image_datasets['train'].classes)
print(confusion_matrix)

# to get percentage for each class
print(confusion_matrix.diag()/confusion_matrix.sum(1))

['B_A', 'B_F', 'B_PT', 'B_TA', 'M_DC', 'M_LC', 'M_MC', 'M_PC']
tensor([[ 49.,   0.,   0.,   0.,   0.,   0.,   2.,   0.],
        [  2.,  94.,   8.,   7.,   4.,   0.,   3.,   3.],
        [  0.,   4.,  39.,   1.,   1.,   1.,   2.,   2.],
        [  1.,   9.,   0.,  58.,   0.,   0.,   0.,   2.],
        [  6.,   4.,   0.,   1., 348.,  22.,  15.,  23.],
        [  0.,   0.,   0.,   0.,  22.,  47.,   1.,   3.],
        [  1.,   2.,   0.,   4.,   4.,   0.,  84.,   2.],
        [  0.,   0.,   0.,   0.,   4.,   0.,   0.,  64.]])
tensor([0.9608, 0.7769, 0.7800, 0.8286, 0.8305, 0.6438, 0.8660, 0.9412])
