# Baseline Transfer Learning Model for TrashNet Classification
Our baseline model will include a pretrained DenseNet feature extractor with a shallow and wide CNN head. This model will have a homogenous learning rate. We are going to use K-Fold CV as well as F1 score and multi-class AUC to validate our model.
This model acts as a stepping stone / template for future experiments.

In [1]:
import os
import pkbar
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd
import numpy as np
from PIL import Image
from skimage import io, transform
from torch.utils.data import Dataset, DataLoader
import torch.utils.data as data
from torch.utils.data.distributed import DistributedSampler
from torchvision import transforms, utils
from torchvision import models
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score
from pathlib import Path

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
ROOT = Path('../asun/Smart-Trash/data/isbnet/')

## Data Pre-processing
For the baseline model, we will not be applying any data augmentation or color manipulation.
- Get the index CSV file that includes all files their respective directory and labels.

In [3]:
!pwd

/home/jovyan/work


### Trash Dataset
Dataset object to handle various sets of data that we will be dealing with including: TrashNet, ISBNet, and ISBNet extended.

In [4]:
def split(length, split, shuffle=True):
    """
    :returns: random samplers for both the training dataset and the validation dataset.
    """
    indicies = list(range(length))
    split = int(np.floor(split * length))
    
    if shuffle:
        np.random.shuffle(indicies)
    
    train_indicies, val_indicies = indicies[split:], indicies[:split]
    train_sampler = data.SubsetRandomSampler(train_indicies)
    val_sampler = data.SubsetRandomSampler(val_indicies)
    
    return train_sampler, val_sampler

A split function is defined to split the dataset after it is defined as a `DataSet` object. This makes it really easy to handle, because after splitting it, we are creating a sampler object. There is no need to modify the dataset object itself.

In [5]:
class TrashDataset(Dataset):
    def __init__(self, metadata: pd.DataFrame, directory: Path, transform=None):
        """
        metadata: DataFrame that contains information about each image and their labels.
        directory: the directory where the trash data is kept
        root_dir: path to the `directory`
        transform: optional augmentations that are to be applied onto the images
        """
        self.images_folder = directory
        self.meta = metadata
        self.transform = transform
        self.label_dict = {
            'cans': 0,
            'landfill': 1,
            'paper': 2,
            'plastic': 3,
            'tetrapak': 4
        }
  
    def __len__(self):
        return len(self.meta)
  
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        image = Image.open(self.meta.iloc[idx, 0])
        labels = [0] * 5
        labels[self.label_dict[self.meta.iloc[idx, 1]]] = 1
        sample = {'image': image,
                  'path': self.meta.iloc[idx, 0],
                  'label': torch.tensor(self.label_dict[self.meta.iloc[idx, 1]], dtype=torch.float)}

        if self.transform:
              sample['image'] = self.transform(sample['image'])
        return sample

Because we are using cross entropy loss we can express the loss as simply a scalar. This scalar is between [0-numclasses]. 

## Model and Training Setup
- VGG16 pretrained with ImageNet
- Wide and shallow CNN with fully connected and log-softmax activation
- CrossEntropy loss and Adam optimizer.

### Model Definition

In [6]:
class VGG16BN(nn.Module):
    def __init__(self):
        super(VGG16BN, self).__init__()
        self.head = models.densenet169(pretrained=True)
        self.head.requires_grad = False
        # Remove classification layers so that we are able to add our own CNN layers
        self.head.classifier = nn.Sequential(
                                    nn.Linear(1664, 1024, bias=True),
                                    nn.BatchNorm1d(1024),
                                    nn.ReLU(),
                                    nn.Dropout(0.05),
                                    nn.Linear(1024, 512, bias=True),
                                    nn.BatchNorm1d(512),
                                    nn.ReLU(),
                                    nn.Dropout(0.10),
                                    nn.Linear(512, 5, bias=True),
                                    nn.ReLU())
    def freeze(self, n_top, freeze_head=True):
        """
        Freeze head layers.
        """
        for index, layer in enumerate(self.head.children()):
            if index == len(list(self.head.children())) - 1:
                return
            else:
                layer.requires_grad = False
#         self.head.features.requires_grad = not freeze_head
#         for head in range(n_top):
#             self.head.classifier[head].requires_grad = False
        
    def forward(self, x):
        return self.head(x)
  
    def num_flat_features(self, x):
        """
        https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html#sphx-glr-beginner-blitz-neural-networks-tutorial-py
        """
        size = x.size()[1:]  # get all dimensions except for batch size
        features = 1
        for s in size:
            features *= s
        return features

### Define Constants

In [7]:
FOLDS = 5
EPOCHS = 50
BATCH_SIZE = 32

### KFold Training and CV
* KFold setup with `StratifiedKFold`
* Creating Dataloaders in training loop.
* Using Adam and CrossEntropy Loss
* Center crop on images to make them 224x224 so VGG will be able to take them.

In [8]:
train_transform = transforms.Compose([
                                transforms.Resize(256),
                                transforms.RandomResizedCrop(224),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406],
                                                     [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
                                transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406],
                                                     [0.229, 0.224, 0.225])
])

In [9]:
metadata = pd.read_csv(ROOT / 'metadata.csv')
TRAIN_VAL = TrashDataset(metadata, ROOT, transform)
metadata

Unnamed: 0,filepath,category,categorial_time,trashcan_id,trashcan_time,trashcan_location,landmarks
0,../asun/Smart-Trash/data/isbnet/cans/9A/202001...,cans,"(0, 0, 0, 1, 1, 1, 0, 0, 0, 0)",9A,"(0, 0, 0, 0, 0, 0, 0, 1, 1, 1)","[(24.07, 7.791), (2.209, 7.907), (8.372, 6.047...","['theater', 'printer', 'stairwell', 'bathroom'..."
1,../asun/Smart-Trash/data/isbnet/cans/9A/202001...,cans,"(0, 0, 0, 1, 1, 1, 0, 0, 0, 0)",9A,"(0, 0, 0, 0, 0, 0, 0, 1, 1, 1)","[(24.07, 7.791), (2.209, 7.907), (8.372, 6.047...","['theater', 'printer', 'stairwell', 'bathroom'..."
2,../asun/Smart-Trash/data/isbnet/cans/9A/202001...,cans,"(0, 0, 0, 1, 1, 1, 0, 0, 0, 0)",9A,"(0, 0, 0, 0, 0, 0, 0, 1, 1, 1)","[(24.07, 7.791), (2.209, 7.907), (8.372, 6.047...","['theater', 'printer', 'stairwell', 'bathroom'..."
3,../asun/Smart-Trash/data/isbnet/cans/6B/cans29...,cans,"(0, 0, 0, 1, 1, 1, 0, 0, 0, 0)",6B,"(0, 1, 0, 1, 0, 0, 1, 0, 0, 0)","[(0.465, 7.442), (15.116, 6.744), (10.349, 7.5...","['bathroom', 'stairwell', 'couch_area']"
4,../asun/Smart-Trash/data/isbnet/cans/6B/IMG_73...,cans,"(0, 0, 0, 1, 1, 1, 0, 0, 0, 0)",6B,"(0, 1, 0, 1, 0, 0, 1, 0, 0, 0)","[(0.465, 7.442), (15.116, 6.744), (10.349, 7.5...","['bathroom', 'stairwell', 'couch_area']"
...,...,...,...,...,...,...,...
883,../asun/Smart-Trash/data/isbnet/tetrapak/8A/tr...,tetrapak,"(0, 1, 1, 1, 1, 1, 0, 1, 1, 0)",8A,"(0, 0, 0, 1, 1, 1, 1, 1, 0, 0)","[(17.558, 31.977), (7.558, 1.86), (9.186, 13.8...","['cafeteria', 'stairwell', 'bathroom', 'librar..."
884,../asun/Smart-Trash/data/isbnet/tetrapak/8A/tr...,tetrapak,"(0, 1, 1, 1, 1, 1, 0, 1, 1, 0)",8A,"(0, 0, 0, 1, 1, 1, 1, 1, 0, 0)","[(17.558, 31.977), (7.558, 1.86), (9.186, 13.8...","['cafeteria', 'stairwell', 'bathroom', 'librar..."
885,../asun/Smart-Trash/data/isbnet/tetrapak/8A/20...,tetrapak,"(0, 1, 1, 1, 1, 1, 0, 1, 1, 0)",8A,"(0, 0, 0, 1, 1, 1, 1, 1, 0, 0)","[(17.558, 31.977), (7.558, 1.86), (9.186, 13.8...","['cafeteria', 'stairwell', 'bathroom', 'librar..."
886,../asun/Smart-Trash/data/isbnet/tetrapak/8A/tr...,tetrapak,"(0, 1, 1, 1, 1, 1, 0, 1, 1, 0)",8A,"(0, 0, 0, 1, 1, 1, 1, 1, 0, 0)","[(17.558, 31.977), (7.558, 1.86), (9.186, 13.8...","['cafeteria', 'stairwell', 'bathroom', 'librar..."


### Model Definition

In [10]:
#model = VGG16BN()
#model.freeze(5)
# model = nn.DataParallel(model, device_ids=[0,1,2,3])
#model = model.to(0)

### Loss Function and Optimizer

In [11]:
#celoss = nn.CrossEntropyLoss(weight=torch.tensor([6.0241, 3.6496, 2.4390, 1.0823, 4.3860]).to(0, dtype=torch.float))
#optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)

### Training and Validation

In [10]:
#train_sampler, val_sampler = split(len(TRAIN_VAL), 0.13)
#train_loader = DataLoader(TRAIN_VAL, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=4)
#valid_loader = DataLoader(TRAIN_VAL, batch_size=BATCH_SIZE, sampler=val_sampler, num_workers=4)

trash_labels = metadata.iloc[:,1].values
#print(trash_labels)

s = StratifiedKFold(n_splits=FOLDS, shuffle=True).split(metadata, trash_labels)

In [None]:
for fold, (train_idx, test_idx) in enumerate(s):
    
    model = VGG16BN()
    model.to(device)
    
    celoss = nn.CrossEntropyLoss(weight=torch.tensor([6.0241, 3.6496, 2.4390, 1.0823, 4.3860]).to(0, dtype=torch.float))
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)
    
    max_f1 = 0
    
    train = TrashDataset(metadata.iloc[train_idx,:], ROOT, train_transform)
    test = TrashDataset(metadata.iloc[test_idx,:], ROOT, val_transform)
    
    train_loader = torch.utils.data.DataLoader(train, 
                                               batch_size=BATCH_SIZE,
                                               shuffle=True,
                                               num_workers=4) 
    test_loader = torch.utils.data.DataLoader(test, 
                                              batch_size=BATCH_SIZE,
                                              shuffle=True,
                                              num_workers=4)

    for epoch in range(EPOCHS):
        print(f'Fold: {fold+1} Epoch: {epoch+1}/{EPOCHS} Max F1: {max_f1}')
        pbar = pkbar.Kbar(target=len(train_loader), width=15)
        # Training 
        model.train()
        for batch_num, inputs in enumerate(train_loader):
            images = inputs['image'].to(0, dtype=torch.float)
            labels = inputs['label'].to(0, dtype=torch.long)

            # Forward Feeding
            optimizer.zero_grad()
            outputs = model(images)
            loss_value = celoss(outputs, labels)
            loss_value.backward()
            optimizer.step()

            # Generate Metrics and Update Progress Bar Every 10~20 Batches
            predictions = torch.max(outputs, 1)[1].cpu().detach().numpy()
            metric_label = labels.cpu().detach().numpy()
            f1 = f1_score(metric_label, predictions, average='macro')
            accuracy = accuracy_score(metric_label, predictions)

            # Update Progress Bar
            pbar.update(batch_num, values=[('CELoss', loss_value.item()), ('F1_Score', f1),
                                           ('Accuracy', accuracy)])

            # Free up CUDA memory
            del images, labels
            torch.cuda.empty_cache()

        val_loss, val_f1, val_acc = [], [], []
        model.eval()
        for inputs in test_loader:
            images = inputs['image'].to(0, dtype=torch.float)
            labels = inputs['label'].to(0, dtype=torch.long)

            # Forward Feeding
            outputs = model(images)
            predictions = torch.max(outputs, 1)[1].cpu().detach().numpy()
            metric_label = labels.cpu().detach().numpy()

            # Metric Calculation
            val_loss.append(celoss(outputs, labels).item())
            val_f1.append(f1_score(metric_label, predictions, average='macro'))
            val_acc.append(accuracy_score(metric_label, predictions))

        pbar.add(1, values=[('val_CELoss', sum(val_loss)/len(val_loss)),
                            ('val_F1_Score', sum(val_f1)/len(val_f1)),
                            ('val_Accuracy', sum(val_acc)/len(val_acc))])
        if sum(val_f1)/len(val_f1) > max_f1:
            max_f1 = sum(val_f1)/len(val_f1)
            torch.save(model.state_dict(), f'../asun/Smart-Trash/models/kfold-nometa-densenet169/model{fold}-{epoch}.pth')

Fold: 1 Epoch: 1/50 Max F1: 0
Fold: 1 Epoch: 2/50 Max F1: 0.5320718606012724
Fold: 1 Epoch: 3/50 Max F1: 0.7368379311320488
Fold: 1 Epoch: 4/50 Max F1: 0.8102994814674114
Fold: 1 Epoch: 5/50 Max F1: 0.835318701360368
Fold: 1 Epoch: 6/50 Max F1: 0.8407265995994155
Fold: 1 Epoch: 7/50 Max F1: 0.8469244661318699
Fold: 1 Epoch: 8/50 Max F1: 0.889987671346367
Fold: 1 Epoch: 9/50 Max F1: 0.8983689472883847
Fold: 1 Epoch: 10/50 Max F1: 0.8983689472883847
Fold: 1 Epoch: 11/50 Max F1: 0.8983689472883847
Fold: 1 Epoch: 12/50 Max F1: 0.8983689472883847
Fold: 1 Epoch: 13/50 Max F1: 0.8983689472883847
Fold: 1 Epoch: 14/50 Max F1: 0.8983689472883847
Fold: 1 Epoch: 15/50 Max F1: 0.8983689472883847
Fold: 1 Epoch: 16/50 Max F1: 0.8983689472883847
Fold: 1 Epoch: 17/50 Max F1: 0.8983689472883847
Fold: 1 Epoch: 18/50 Max F1: 0.8983689472883847
Fold: 1 Epoch: 19/50 Max F1: 0.8983689472883847
Fold: 1 Epoch: 20/50 Max F1: 0.9002408344852587
Fold: 1 Epoch: 21/50 Max F1: 0.9002408344852587
Fold: 1 Epoch: 22/50

In [None]:
# def validation(model, validation_loader, loss, device):
#     model = model.to(0)
#     model.eval()
#     loss_log, acc_log, f1_log = [], [], []
#     for batch_num, inputs in enumerate(validation_loader):
#         # Load data onto device: GPU or CPU
#         images = inputs['image'].to(0, dtype=torch.float)
#         labels = inputs['label'].to(0, dtype=torch.long)
        
#         # Forward Feeding
#         outputs = model(images)
#         loss_value = loss(outputs, labels).mean()
#         preds = torch.max(outputs, 1)[1].cpu().detach().numpy()
#         loss_log.append(loss_value)
        
#         # Metric Calculation
#         acc = accuracy_score(preds, labels.cpu().detach().numpy())
#         f1 = f1_score(labels.cpu().detach().numpy(), preds, average='macro')
#         acc_log.append(acc)
#         f1_log.append(f1)
#         # Free up memory
#         del images, labels
#         torch.cuda.empty_cache()
        
#     return sum(loss_log) / len(loss_log), sum(acc_log) / len(acc_log), sum(f1_log) / len(f1_log)

In [None]:
# def train(epochs, model, batchsize, train_loader, validation_loader, 
#           loss, optimizer, device):
#     model = model.to(0)
#     model.train()
#     for epoch in range(epochs):
#         print(f'Epochs: {epoch+1}/{EPOCHS}')
#         kbar = pkbar.Kbar(target=len(train_loader), width=10)
#         for batch_num, inputs in enumerate(train_loader):
#             # Load data onto device: GPU or CPU
#             images = inputs['image'].to(0, dtype=torch.float)
#             labels = inputs['label'].to(0, dtype=torch.long)
#             # Zero the optimizer
#             optimizer.zero_grad()
#             # Forward Feeding
#             outputs = model(images)
#             loss_value = loss(outputs, labels)
#             # Backpropagation
#             loss_value.mean().backward()
#             optimizer.step()
#             # Metric Calculation
#             preds = torch.max(outputs, 1)[1].cpu().detach().numpy()
#             acc = accuracy_score(preds, labels.cpu().detach().numpy())
#             train_f1 = f1_score(labels.cpu().detach().numpy(), preds, average='macro')
#             # Update progress bar
#             kbar.update((batch_num+1), values=[('loss', loss_value), 
#                                                ('acc', acc), 
#                                                ('f1', train_f1)])
#             # Free up memory
#             del images, labels
#             torch.cuda.empty_cache()
#         l, ac, f1 = validation(model, validation_loader, loss, device)
#         kbar.add(1, values=[('val_loss', l), 
#                             ('val_acc', ac),
#                             ('val_f1', f1)])

In [None]:
# # 0.083
# model = nn.DataParallel(model, device_ids=[0,1,2,3])
# train(EPOCHS, model, BATCH_SIZE, train_loader, valid_loader,
#       celoss, optimizer, device)