In [1]:
import pandas as pd
from skimage import io

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Compose, ToTensor

from tqdm.auto import tqdm

### Create a model

Here, we have the network model class definition. As defined in the exercises section, your task is to update the network architecture defined in this class such that the network will return the highest accuracy for the given training, validation, and testing data.

In [2]:
class ClassifierDNNModel(nn.Module):
    '''
    Classifier DNN Class
    Values:
        input_dim: number of channels in the images
        hidden_dim: inner dimension for conv layers
        linear_dim: dimenstion for the linear layer
        n_classes: number of output classes
    '''
    def __init__(self, input_dim=(128, 128, 3), hidden_dim=25, linear_dim=2048, n_classes=3, **kwargs):
        super(ClassifierDNNModel, self).__init__()

        (h, w, c) = input_dim

        # Build our model
        self.model = nn.Sequential(
            # input is           (c) x h      x w
            # output is (hidden_dim) x h / 2  x w / 2
            self.make_conv_block(c, hidden_dim),
            
            # input is  (hidden_dim)     x h / 2  x w / 2
            # output is (hidden_dim x 2) x h / 4  x w / 4
            self.make_conv_block(hidden_dim, hidden_dim * 2),

            nn.Flatten(),
            # dimensions after flatten layer is the image size * dimensions.
            self.make_linear_block(hidden_dim * 2 * h // 4 * w // 4, linear_dim),
            self.make_linear_block(linear_dim, n_classes, last_layer=True),
        )
    
    def make_conv_block(self, input_channels, output_channels):
        '''
        Returns a sequence corresponding to the convolutional layers in our DNN model.
        Parameters:
            input_channels: number of input channels to this block
            output_channels: number of output channels from this block
            batch_norm: if batch normalization should be used or not.
        '''
        return nn.Sequential(
            nn.Conv2d(input_channels, output_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(output_channels),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

    def make_linear_block(self, input_channels, output_channels, last_layer=False):
        '''
        Returns a sequence corresponding to the linear layers in our DNN model.
        Parameters:
            input_channels: number of input channels to this block
            output_channels: number of output channels from this block
            batch_norm: if batch normalization should be used or not.
            last_layer: if this is the final layers in our model
        '''
        if last_layer:
            return nn.Sequential(
                nn.Linear(input_channels, output_channels, bias=False),
                nn.Softmax(dim=1)
            )
        else:
            return nn.Sequential(
                nn.Linear(input_channels, output_channels, bias=False),
                nn.BatchNorm1d(output_channels),
                nn.ReLU()
            )

    def forward(self, image):
        '''
        Function for completing a forward pass: Given an image, 
        returns predicted class.
        Parameters:
            image: an image tensor with dimension (input_dim)
        '''
        return self.model(image)


### Setup network hyperparameters

We import the network hyperparameters and build a simple cnn by calling the class introduced in the previous cell.

In [3]:
hparam = {
    'input_dim': (128, 128, 3),
    'hidden_dim': 25,
    'linear_dim': 2048,
    'n_classes': 3,
    'device': 'cuda',
    'lr': 0.001,
    'n_epochs': 5,
}

model = ClassifierDNNModel(**hparam).to(hparam['device'])
model_opt = torch.optim.Adam(model.parameters(), lr=hparam['lr'])

### ETL

Extract, transform load the dataset.

In [4]:
class ArtificialFlyingObjectsDataset(Dataset):
    '''
    Artificial flying objects dataset.
    Values:
        root_dir: root directory where images can be found.
        split: If it should return training, validation, testing split
        fineGrained: If we should use the fine grained classes
        transforms: List of optional tensorvision transforms.
    '''
    def __init__(self, root_dir='../data/FlyingObjectDataset_10K', split='training', fineGrained=False, transforms=None):
        import glob
        self.split = split
        self.datadir = root_dir
        filenames = glob.glob(f'{root_dir}/{split}/image/*.png')
        categories = []
        for filename in filenames:
            seg = filename.split('_')

            if fineGrained:
                categories.append(seg[2] + "_" + seg[3])
            else:
                categories.append(seg[2])

        self.labels = pd.get_dummies(categories)
        self.filenames = list(filenames)
        
        if transforms:
            self.transforms = Compose(transforms.append(ToTensor()))
        else:
            self.transforms = Compose([ToTensor()])

    def __len__(self):
        '''
        Length of dataset
        '''
        return len(self.filenames)
    
    def __getitem__(self, idx):
        '''
        Get individual item.
        '''
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = self.filenames[idx]
        X = io.imread(img_name)
        y = torch.tensor(self.labels.iloc[idx, 0], dtype=torch.long)

        X = self.transforms(X)
        X = X.clone().detach()
        return (X, y)
    
    def __repr__(self):
        head = "Dataset " + self.__class__.__name__ + " : " + self.split
        body = [f"Number of images: {self.__len__()}"]
        body.append(f"Categories: {len(set(self.labels))}")
        body.append(f"Root location: {self.datadir}")
        if hasattr(self, "transforms") and self.transforms is not None:
            body += [f'Transforms: ' + repr(self.transforms)]
        lines = [head] + [" " * 4 + line for line in body]
        return '\n'.join(lines)

In [5]:
# Add optional augmentation as a list of transforms
# See: https://pytorch.org/docs/stable/torchvision/transforms.html
transform = None

train_ds = ArtificialFlyingObjectsDataset(transforms=transform)
val_ds = ArtificialFlyingObjectsDataset(split='validation', transforms=transform)
test_ds = ArtificialFlyingObjectsDataset(split='testing', transforms=transform)

train_dl = DataLoader(
    train_ds,
    batch_size=32,
    shuffle=True,
    drop_last=True)

val_dl = DataLoader(
    val_ds,
    batch_size=32,
    shuffle=False,
    drop_last=True)

test_dl = DataLoader(
    test_ds,
    batch_size=32,
    shuffle=False,
    drop_last=True)

### Print statistics

Print some statistics about the dataset

In [6]:
print(train_ds)
print(val_ds)
print(test_ds)

Dataset ArtificialFlyingObjectsDataset : training
    Number of images: 10817
    Categories: 3
    Root location: ../data/FlyingObjectDataset_10K
    Transforms: Compose(
    ToTensor()
)
Dataset ArtificialFlyingObjectsDataset : validation
    Number of images: 2241
    Categories: 3
    Root location: ../data/FlyingObjectDataset_10K
    Transforms: Compose(
    ToTensor()
)
Dataset ArtificialFlyingObjectsDataset : testing
    Number of images: 2220
    Categories: 3
    Root location: ../data/FlyingObjectDataset_10K
    Transforms: Compose(
    ToTensor()
)


### Training

Setup training hparams and execute training of dataset.

In [7]:
n_epochs = hparam['n_epochs']
cur_step = 0
mean_loss = 0
mean_accuracy = 0
device = hparam['device']

loss_fn = nn.CrossEntropyLoss()

for epoch in range(n_epochs):
    # Dataloader returns the batches

    model.train()
    for x, y in tqdm(train_dl, desc='Train'):
        x, y = x.to(device), y.to(device)

        ## Update model ##
        model_opt.zero_grad()
        yh = model(x)
        loss = loss_fn(yh, y)
        loss.backward()
        model_opt.step()

    # Do verification each epoch
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for x, y in val_dl:
            x, y = x.to(device), y.to(device)
            yh = model(x)
            _, predicted = torch.max(yh.data, 1)
            total += y.size(0)
            correct += (predicted == y).sum().item()
    print(f'Validation accuracy {correct / total:.2f}')

# Calculate final test accuracy
model.eval()
total = 0
correct = 0
with torch.no_grad():
    for x, y in test_dl:
        x, y = x.to(device), y.to(device)
        yh = model(x)
        _, predicted = torch.max(yh.data, 1)
        total += y.size(0)
        correct += (predicted == y).sum().item()

print(f'Test accuracy {correct / total:.2f}')

HBox(children=(IntProgress(value=0, description='Train', max=338, style=ProgressStyle(description_width='initi…


Validation accuracy 0.51


HBox(children=(IntProgress(value=0, description='Train', max=338, style=ProgressStyle(description_width='initi…


Validation accuracy 0.60


HBox(children=(IntProgress(value=0, description='Train', max=338, style=ProgressStyle(description_width='initi…


Validation accuracy 0.54


HBox(children=(IntProgress(value=0, description='Train', max=338, style=ProgressStyle(description_width='initi…


Validation accuracy 0.52


HBox(children=(IntProgress(value=0, description='Train', max=338, style=ProgressStyle(description_width='initi…


Validation accuracy 0.59
Test accuracy 0.61
