# Cancer Detection using InceptionNet V3
----

In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets
import torchvision.models as models
import torchvision.transforms as transforms

## Load in the datasets
----
Info on how to obtain the datasets is contained in the README.md. Data augmentation techniques are applied in the data loader here to be applied to the training data. 

In [None]:
### Write data loaders for training, validation, and test sets
## Specify appropriate transforms, and batch_sizes
train_transfoms = transforms.Compose([transforms.Resize((299 , 299))
                              ,transforms.RandomRotation(degrees = 10)
                              ,transforms.RandomHorizontalFlip(p=0.25)
                              ,transforms.RandomGrayscale(p=0.25)
                              ,transforms.RandomVerticalFlip(p=0.25)
                              ,transforms.ToTensor()
                              ,transforms.Normalize(mean=[0.485, 0.456, 0.406]
                                                   , std=[0.229, 0.224, 0.225])
                              ])

test_val_transfomrs = transforms.Compose([transforms.Resize((299 , 299))
                              ,transforms.ToTensor()
                              ,transforms.Normalize(mean=[0.485, 0.456, 0.406]
                                                   , std = [0.229, 0.224, 0.225])
                              ])

batch_size = 4 #About all my little GPU can handle 
num_workers = 0

train_data = datasets.ImageFolder(root=r'C:\Users\diarm\Downloads\cancer_detection\data\train', transform=train_transfoms)
test_data = datasets.ImageFolder(root=r'C:\Users\diarm\Downloads\cancer_detection\data\test', transform=test_val_transfomrs)
valid_data = datasets.ImageFolder(root=r'C:\Users\diarm\Downloads\cancer_detection\data\valid', transform=test_val_transfomrs)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)

loaders = dict(train=train_loader, test=test_loader, valid=valid_loader)

#### There are three class of skin lesion to look at in the problem.
    - melanoma
    - nevus
    - seborrheic_keratosis

In [None]:
!ls C:\\Users\\diarm\\Downloads\\cancer_detection\\data\\train

In [None]:
inception_net = models.inception_v3(pretrained=True)

# check if CUDA is available
use_cuda = torch.cuda.is_available()

# move model to GPU if CUDA is available
if use_cuda:
    inception_net = inception_net.cuda()

#### The final classifier can be viewed here. 
For transfer learning we will change this final layer to suit our case and retrain it with our data. 

In [None]:
inception_net.fc

# Transfer learning
----
A new fully connected layer is appended to the inception net structure. We are replacing the current fully connected layer which takes in 2048 features and has 1000 outputs. 
The InceptionNet model has been trained on the ImageNet dataset which is a very large collection of images curated into 1 of 1000 classes.

I'm going to use the underlying layers of the network to piggyback on the extraction of features from an image, and just train the final layer to classify the images from our dataset

In [None]:
# Freeze the parameters of the network
for param in inception_net.parameters():
    inception_net.requires_grad = False

# New final fully connected layer here
inception_net.fc1 = nn.Linear(2048, 1024)
inception_net.fc2 = nn.Linear(2048, 3)

# Transfer to GPU if available
if use_cuda:
    inception_net = inception_net.to('cuda')
    
# set params for training of fully connected layer
inception_net.fc1.weight.requires_grad = True
inception_net.fc1.bias.requires_grad = True
inception_net.fc2.weight.requires_grad = True
inception_net.fc2.bias.requires_grad = True

In [None]:
criterion = nn.CrossEntropyLoss()
# These values were obtained from here https://arxiv.org/ftp/arxiv/papers/1810/1810.10348.pdf
optimizer = optim.SGD(inception_net.fc.parameters(),lr=0.0007)

In [None]:
# A helper function to show classification accuracy
def label_accuracy(model, validation_loader, epoch, use_cuda):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, targets in validation_loader:
            if use_cuda:
                images, targets = images.cuda(), targets.cuda()
            outputs = model(images)
            _, preds = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (preds == targets).sum().item()
    print('Accuray test at epoch {}\tLabel Accuracy : - {}'.format(epoch, round(correct / total, 2)))

# Training 
----

**N.B Loss calculation is different with InceptionNet**

It produces two outputs, the standard output and the auxilliary output. The auxiliary outputs is used to prevent the vanishing gradient problem that can occur in very deep networks. 
Loss is calculated like so: 
```python
outputs, aux_outputs = model(data)
loss1 = criterion(outputs, target)
loss2 = criterion(aux_outputs, target)
loss = loss1 + 0.4*loss2
```

This is described in the pytorch documentation [here](https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html)

In [None]:
# Defining the trainig function
def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            # clear the accumulated gradients 
            optimizer.zero_grad()
            
            # inception net produces two outputs, aux output handles vanishing gradient
            outputs, aux_outputs = model(data)
            #loss calculation https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
            loss1 = criterion(outputs, target)
            loss2 = criterion(aux_outputs, target)
            loss = loss1 + 0.4*loss2 # 0.4 is weight for auxillary classifier
            
            # gradient of the loss with respect to the parameters
            loss.backward()
            # perform the parameter update (update the weights)
            optimizer.step()
            
            ## record the average training loss
            # train loss - adds current loss to accumulated loss (averaged over batch size)
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            
        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            # get prediction
            output = model(data)
            # calculate the loss
            loss = criterion(output, target)
            ## update the average validation loss
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
            
        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))
        
        ## TODO: save the model if validation loss has decreased
        if valid_loss < valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,valid_loss))
            torch.save(model.state_dict(), save_path) #add save path here
            valid_loss_min = valid_loss
    
        label_accuracy(model, loaders['valid'], epoch, use_cuda)
    # return trained model
    return model

In [None]:
# let 'er riiiip
model_scratch = train(10, loaders, inception_net, optimizer, criterion, use_cuda, save_path='./model_cancer_detection.pt')