In [57]:
import os
from PIL import Image

import numpy as np
import pandas as pd

from tqdm import tqdm, trange

import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader

from torchvision import datasets, transforms, models

## Hyperparameters

Hypterparamters for the whole `training` and `testing`

In [43]:
TRAIN_DIR = './dataset/TRAIN/'
TEST_DIR = './dataset/TEST/'

DATASET_MEAN = [0.4906, 0.5203, 0.5372]
DATASET_STD = [0.2412, 0.2285, 0.2351]

CROP_SIZE = 256           # Image Crop size
IMAGE_SIZE = 224          # Image size, that will fed to Model

BATCH_SIZE = 32           # Batch Size for Training

LR = 1                    # Learning rate for the Optimizer
EPOCHS = 50               # No of Epochs to be trained

CENTER_CROP_FIVE = True   # Use Five crop method for testing

In [3]:
dataset = datasets.ImageFolder(root=TRAIN_DIR, transform=transforms.Compose([transforms.ColorJitter(0.2, 0.2),
                                                                                     transforms.Resize(CROP_SIZE),
                                                                                     transforms.RandomHorizontalFlip(),
                                                                                     transforms.RandomCrop(IMAGE_SIZE),
                                                                                     transforms.ToTensor(),
                                                                                     transforms.Normalize(DATASET_MEAN, DATASET_STD),
                                                                                     transforms.RandomErasing()]))

In [35]:
# Setup Classes
CLASSES = dataset.classes

class_to_idx = dataset.class_to_idx
idx_to_class = {i:c for c, i in class_to_idx.items()}
print(f"No of CLASSES is {len(CLASSES)}")

No of CLASSES is 9


### Dataloaders

In this code section, it creates dataloader for training

In [5]:
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

## CUDA

Checks for GPU for training

In [6]:
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
if torch.cuda.is_available():
    print(f"Using {torch.cuda.get_device_name()} for Training")
else:
    print("Using CPU for Training")

Using GeForce 940MX for Training


## Models

### Densenet Model

This model is based on `densenet161`. It uses the transfer learning feature to fine tune the densenet model. I changed the last layer of the model to match the current requirement

To know more about the Densenet [https://arxiv.org/abs/1608.06993](https://arxiv.org/abs/1608.06993)

In [7]:
class DensenetModel(nn.Module):
    def __init__(self):
        super(DensenetModel, self).__init__()
        m = models.densenet121(pretrained=True)
        fc = nn.Linear(m.classifier.in_features, len(CLASSES))
        m.classifier = fc
        self.model = m
        
    def forward(self, x):
        x = self.model(x)
        x = F.log_softmax(x, dim=1)
        return x

***After a lot of research, I decided to use the `densenet121` model for the training.***

In [8]:
model = DensenetModel()
model = model.to(device)
model

DensenetModel(
  (model): DenseNet(
    (features): Sequential(
      (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu0): ReLU(inplace=True)
      (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (denseblock1): _DenseBlock(
        (denselayer1): _DenseLayer(
          (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu1): ReLU(inplace=True)
          (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu2): ReLU(inplace=True)
          (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        )
        (denselayer2): _DenseLayer(
          (norm1): BatchNorm2d(96, eps=1e-05, momentum

## Loss and Optimizer


**Loss:** As the Loss function, I am using the negative log likelihood loss, which is `LogSoftmax + CrossEntropyLoss`

**Optimizer:**: For the optimizer, I am using the `Adam` optimizer with the momentum of `0.5`

In [9]:
# Loss
criterion = nn.NLLLoss().cuda() if torch.cuda.is_available() else nn.NLLLoss()

# Optimizer
# optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=LR, betas=(0.5, 0.999))

## LR scheduler

I created a custom `LR scheduler`, which 

1. starts training at `LR_START`,
2. then increses the LR for `LR_RAMPUP_EPOCHS` upto LR_MAX
3. then it sustains the LR for `LR_SUSTAIN_EPOCHS`
4. lastly it decays the LR exponentilly

In [10]:
LR_START = 1e-5
LR_MAX = 7e-5 
LR_MIN = LR_START
LR_RAMPUP_EPOCHS = 7
LR_SUSTAIN_EPOCHS = 0 # 3
LR_EXP_DECAY = 0.80

def lrfn(epoch):
    if epoch < LR_RAMPUP_EPOCHS:
        lr = LR_START + (epoch * (LR_MAX - LR_START) / LR_RAMPUP_EPOCHS)
    elif epoch < (LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS):
        lr = LR_MAX
    else:
        lr = LR_MIN + (LR_MAX - LR_MIN) * LR_EXP_DECAY ** (epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS)
#    print('For epoch', epoch, 'setting lr to', lr)
    return lr

In [11]:
scheduler = lr_scheduler.LambdaLR(optimizer, lrfn, verbose=True)

Adjusting learning rate of group 0 to 1.0000e-05.


In [None]:
train_losses = []

print(f"Training Stated on {datetime.datetime.now().replace(microsecond=0)}")

for epoch in range(1, EPOCHS + 1):
    a = datetime.datetime.now().replace(microsecond=0)
    #Training
    train_loss = 0.0
    
    model.train()
    for x, y in dataloader:
        x = x.to(device)
        y = y.to(device)
        optimizer.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * x.size(0)
        
    train_loss /= len(trainloader.dataset)
    train_losses.append(train_loss)
    
    print(f'Saving model for Epoch {epoch}')
    torch.save(model.state_dict(), f'./models/model_{epoch}.pt')
    
    if scheduler is None:
        print("No Schedular found. LR will not change")
    else:
        if isinstance(scheduler, lr_scheduler.ReduceLROnPlateau):
            scheduler.step(test_loss)
        else:
            scheduler.step()
    
    b = datetime.datetime.now().replace(microsecond=0)    
    print(f"[Epoch: {epoch}/{EPOCHS}] Time Taken: {b-a} training loss: {train_loss}")
    
print("End of training!!")

### Training Stats

In [None]:
#Cheaks the training and testing losses

plt.plot(train_losses)

# Testing

All this codebase is for testing the model and writing the `output.csv` file.

In [13]:
test_filenames = os.listdir(TEST_DIR)

In [47]:
if not CENTER_CROP_FIVE:
    test_transform = transforms.Compose([transforms.Resize(CROP_SIZE),
                                         transforms.CenterCrop(IMAGE_SIZE),
                                         transforms.ToTensor(),
                                         transforms.Normalize(DATASET_MEAN, DATASET_STD)])
else:
    test_transform = transforms.Compose([transforms.Resize(CROP_SIZE),
                                         transforms.FiveCrop(IMAGE_SIZE),
                                         transforms.Lambda(lambda crops: torch.stack([transforms.Compose([
                                             transforms.ToTensor(),
                                             transforms.Normalize(DATASET_MEAN, DATASET_STD)])(crop) for crop in crops]))
                                        ])
print(test_transform)

Compose(
    Resize(size=256, interpolation=PIL.Image.BILINEAR)
    FiveCrop(size=(224, 224))
    Lambda()
)


In [62]:
def load_and_infarence(epoch):
    model.load_state_dict(torch.load(f'./models/model_{epoch}.pt'))

    y_test_pred = np.empty(0)

    model.eval()
    with torch.no_grad():
        for test_filename in tqdm(test_filenames):
            img = Image.open(os.path.join(TEST_DIR, test_filename)).convert('RGB')
            img = test_transform(img).to(device)
            if CENTER_CROP_FIVE:
                out = model(img)
                out = torch.exp(out)
                out = out.argmax(1).median()
                y_test_pred = np.append(y_test_pred, out.cpu().numpy())
            else:
                out = model(img.unsqueeze(0))
                out = torch.exp(out).squeeze(0)
                out = out.argmax()
                y_test_pred = np.append(y_test_pred, out.cpu().numpy())

        d = {'Filename': test_filenames, 'Class': y_test_pred.astype(int)}
        pd.DataFrame(d).replace({'Class': idx_to_classes}).to_csv(f'./output_{epoch}.csv', index=False)

In [63]:
for i in trange(1, 31):
    print(f"Working for epoch {i}")
    load_and_infarence(i)

  0%|                                                                                           | 0/30 [00:00<?, ?it/s]

Working for epoch 1





FileNotFoundError: [Errno 2] No such file or directory: './models/model_1.pt'