## BASE MODEL TO DETECT PARASITES and NON_PARASITES

The data set is downloaded from [kaggle](https://www.kaggle.com/iarunava/cell-images-for-detecting-malaria).
The target dataset is heavily imbalanced, hence we are trying to build a base model which has already learnt the basic features from a similar kind of dataset and can be used in the later with limited data.

In [1]:
# Import the required modules
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
import time
import torchvision
import copy
from copy import copy
from torch.utils.data import random_split
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torchvision import datasets, models, transforms
# from torch.utils.data.sampler import SubsetRandomSampler

torch.cuda.current_device() # Work around for the Bug https://github.com/pytorch/pytorch/issues/20635

0

In [2]:
data_dir = r"C:\ADM_project\cell_images"

In [3]:
dataset_transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(50),
        transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
        transforms.RandomResizedCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

dataset = datasets.ImageFolder(data_dir, dataset_transform)

In [4]:
# Defining dataloaders which is used in creating subsets
dataset_loader = torch.utils.data.DataLoader(dataset, batch_size=16,
                                             shuffle=True, num_workers=4)

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [8]:
# As a pre-trained model we are considering VGG-16 with the pre-trained weights on ImageNet
model_vgg16 = models.vgg16(pretrained=True)

# Cancelling gradient descent calculation for all the layers
for param in model_vgg16.parameters():
    param.requires_grad = False

# Activating last two Convolutional layers of the network
len_features = len(model_vgg16.features)
for i in range(len_features-5, len_features):
    model_vgg16.features[i].requires_grad = True

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_vgg16.classifier[0].in_features
model_vgg16.classifier = nn.Sequential(
    nn.Linear(num_ftrs, 512), 
    nn.ReLU(), 
    nn.Dropout(p=0.2), 
    nn.Linear(512, 2)
)

# Loading the device to CUDA
model_vgg16.to(device)

# Loss Function definition
criterion = nn.CrossEntropyLoss()

# Using Adam as the optimizer for the feature network
optimizer_feature = optim.Adam(model_vgg16.features.parameters(), lr=0.001)
# Using Stochastic Gradient Descent as the optimizer for the classifier network
optimizer_classifier = optim.SGD(model_vgg16.classifier.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_classifier, step_size=7, gamma=0.1)

In [14]:
def train_model(model, criterion, optimizer_fe, optimizer_cl, scheduler, num_epochs=25):
    since = time.time()

#     best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in dataset_loader:
                # get the inputs
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                # zero the parameter gradients
                optimizer_fe.zero_grad()
                optimizer_cl.zero_grad()

                # forward
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer_fe.step()
                    optimizer_cl.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
#                 print(preds==labels.data)
                running_corrects += torch.sum(preds == labels.data)
            print("Running Corrects: ", running_corrects)
            print("Dataset Size: ", len(dataset))
            epoch_loss = running_loss / len(dataset)
            epoch_acc = running_corrects.item() / len(dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
#                 best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
#     model.load_state_dict(best_model_wts)
    return model

In [15]:
model_ft = train_model(model_vgg16, criterion, optimizer_feature, optimizer_classifier, exp_lr_scheduler,
                       num_epochs=10)
# Free up Cached memory
torch.cuda.empty_cache()

Epoch 0/9
----------
Running Corrects:  tensor(1472, device='cuda:0')
Dataset Size:  1800
train Loss: 0.4115 Acc: 0.8178

Epoch 1/9
----------
Running Corrects:  tensor(1470, device='cuda:0')
Dataset Size:  1800
train Loss: 0.4012 Acc: 0.8167

Epoch 2/9
----------
Running Corrects:  tensor(1494, device='cuda:0')
Dataset Size:  1800
train Loss: 0.3620 Acc: 0.8300

Epoch 3/9
----------
Running Corrects:  tensor(1536, device='cuda:0')
Dataset Size:  1800
train Loss: 0.3356 Acc: 0.8533

Epoch 4/9
----------
Running Corrects:  tensor(1519, device='cuda:0')
Dataset Size:  1800
train Loss: 0.3506 Acc: 0.8439

Epoch 5/9
----------
Running Corrects:  tensor(1517, device='cuda:0')
Dataset Size:  1800
train Loss: 0.3429 Acc: 0.8428

Epoch 6/9
----------
Running Corrects:  tensor(1526, device='cuda:0')
Dataset Size:  1800
train Loss: 0.3464 Acc: 0.8478

Epoch 7/9
----------
Running Corrects:  tensor(1529, device='cuda:0')
Dataset Size:  1800
train Loss: 0.3418 Acc: 0.8494

Epoch 8/9
----------
Run

In [13]:
torch.cuda.empty_cache()