## BASE MODEL TO DETECT PARASITES and NON_PARASITES

The data set is downloaded from [kaggle](https://www.kaggle.com/iarunava/cell-images-for-detecting-malaria).
The target dataset is heavily imbalanced, hence we are trying to build a base model which has already learnt the basic features from a similar kind of dataset and can be used in the later with limited data. We are planning to employ Tarnsfer Learning here.

In [1]:
# Import the required modules
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
import time
import torchvision
import copy
from torch.utils.data import random_split
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torchvision import datasets, models, transforms
# from torch.utils.data.sampler import SubsetRandomSampler

torch.cuda.current_device() # Work around for the Bug https://github.com/pytorch/pytorch/issues/20635

0

In [2]:
data_dir = r"E:\Class_Notes_Sem2\ADM\Project\malaria-bounding-boxes\malaria\Processed_Images"

In [3]:
transformormations = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(50),
        transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
        transforms.RandomResizedCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(240),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(240),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

dataset = { x : datasets.ImageFolder(os.path.join(data_dir, x), transformormations[x])
               for x in ['train', 'test']
          }

dataset_loaders = {x : torch.utils.data.DataLoader(dataset[x], batch_size=16,
                        shuffle=True, num_workers=4) for x in ['train', 'test']
                  }

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [17]:
model = torch.load(r'E:\Class_Notes_Sem2\ADM\Project\first_model.pth')
for param in model.parameters():
    param.requires_grad = False
num_ftrs = model.classifier[0].in_features
model.classifier = nn.Sequential(
    nn.Linear(num_ftrs, 1024),  
    nn.ReLU(), 
    nn.Dropout(p=0.2),
    nn.Linear(1024, 6)
)



In [19]:
# Loading the device to CUDA
model.to(device)

# Loss Function definition
criterion = nn.CrossEntropyLoss()

optimizer_classifier = optim.Adam(model.classifier.parameters(), lr=0.003)

# Decay LR by a factor of 0.1 every 4 epochs
classifier_lr_scheduler = lr_scheduler.StepLR(optimizer_classifier, step_size=4, gamma=0.1)

In [20]:
def train_model(model, criterion, optimizer_cl, scheduler2, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
#                 scheduler1.step()
                scheduler2.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode
            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in dataset_loaders[phase]:
                # get the inputs
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                # zero the parameter gradients
#                 optimizer_fe.zero_grad()
                optimizer_cl.zero_grad()

                # forward
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
#                     optimizer_fe.step()
                    optimizer_cl.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            epoch_loss = running_loss / len(dataset[phase])
            epoch_acc = running_corrects.item() / len(dataset[phase])

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [21]:
def evaluate_model(model, datalaoder, criterion):
    model.train(False)
    running_loss, running_corrects = 0, 0
    for data in datalaoder:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs.data, 1)
        loss = criterion(outputs, labels)
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
    test_loss = running_loss / len(dataset['test'])
    test_acc = running_corrects.item() / len(dataset['test'])
    print('Test Loss: {:.4f} Acc: {:.4f}'.format(test_loss, test_acc))
    return test_loss, test_acc

In [22]:
model_ft = train_model(model, criterion, optimizer_classifier, classifier_lr_scheduler,
                       num_epochs=10)
# Free up Cached memory
# torch.cuda.empty_cache() 

Epoch 0/9
----------
train Loss: 1.9102 Acc: 0.4025
test Loss: 1.3791 Acc: 0.3739

Epoch 1/9
----------
train Loss: 1.3778 Acc: 0.4496
test Loss: 1.3131 Acc: 0.4217

Epoch 2/9
----------
train Loss: 1.3861 Acc: 0.4597
test Loss: 1.1387 Acc: 0.5348

Epoch 3/9
----------
train Loss: 1.2527 Acc: 0.5102
test Loss: 1.1305 Acc: 0.5783

Epoch 4/9
----------
train Loss: 1.2475 Acc: 0.5078
test Loss: 1.1320 Acc: 0.5783

Epoch 5/9
----------
train Loss: 1.2099 Acc: 0.5184
test Loss: 1.1352 Acc: 0.5565

Epoch 6/9
----------
train Loss: 1.1936 Acc: 0.5359
test Loss: 1.1006 Acc: 0.5783

Epoch 7/9
----------
train Loss: 1.1795 Acc: 0.5383
test Loss: 1.0995 Acc: 0.5826

Epoch 8/9
----------
train Loss: 1.1780 Acc: 0.5461
test Loss: 1.1025 Acc: 0.5739

Epoch 9/9
----------
train Loss: 1.1667 Acc: 0.5466
test Loss: 1.1074 Acc: 0.5826

Training complete in 6m 39s
Best val Acc: 0.582609


In [14]:
evaluate_model(model_ft, dataset_loaders['test'], criterion)

Test Loss: 0.1994 Acc: 0.9279


(0.1993926444538231, 0.9279035433070866)

In [16]:
# Free up Cached memory
torch.cuda.empty_cache()