## Convolutional Neural Networks

### Understanding CNNs
Certain types of NNs, particularly CNNs, can automatically learn features from raw datasets that are most useful for the task at hand.

It is common to consider early layers of CNN as feature extractors while later layers are usually fully connected (i.e. a MLP) to use the extracted features from the CNN to perform a regression of classification task.

### Loss Functions for Classification

**Binary Cross Entropy** is the loss function for binary classification while **Categorical Cross-Entropy** is the loss function for multiclass classification.

For Binary Classification: 
1. `BCELoss` : pass in class probabilities
2. `BCEWithLogitsLoss` : pass in the logits

For Multiclass Classification:
1. `NLLLoss` (Negative Log Likelihood) : pass in log probabilites
2. `CrossEntropyLoss` : preferred that logits are passed in due to numerical stability.

In [None]:
import torch
import torch.nn as nn
import platform as pl

use_gpu = True

if pl.system().lower() == "linux" and use_gpu:
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
elif pl.system().lower() == "linux" and not use_gpu:
    device = torch.device("cpu")

print(f"Using device {device}")

In [None]:
# Binary Cross-entropy
logits = torch.Tensor([0.8])
target = torch.Tensor([1.0])
probas = torch.sigmoid(logits)
bce_loss_fn = nn.BCELoss()
bce_logits_loss_fn = nn.BCEWithLogitsLoss()
print(f"BCE (w/probas) = {bce_loss_fn(probas, target):.4f}")
print(f"BCE (w/logits) = {bce_logits_loss_fn(logits, target):.4f}")

# Categorical Cross-entropy
logits = torch.Tensor([[1.5, 0.8, 2.1]])
target = torch.Tensor([2]).type(torch.LongTensor) # torch will not accept a float tensor as categorical target
                                                  # need to cast type Long
probas = torch.log(torch.softmax(logits, dim=1))
cce_loss_fn = nn.NLLLoss()
cce_loss_logits_fn = nn.CrossEntropyLoss()
print(f"CCE (w/probas) = {cce_loss_fn(probas, target):.4f}")
print(f"CCE (w/logits) = {cce_loss_logits_fn(logits, target):.4f}")

### Loading and Preprocessing Data

In [None]:
import torchvision
from torchvision import transforms

In [None]:
image_path = "./"
transform = transforms.Compose([transforms.ToTensor()])
mnist_dataset = torchvision.datasets.MNIST(root=image_path, train=True, transform=transform, download=True)

In [None]:
from torch.utils.data import Subset
mnist_val = Subset(mnist_dataset, torch.arange(10000))
mnist_train = Subset(mnist_dataset, torch.arange(10000, len(mnist_dataset)))
mnist_test = torchvision.datasets.MNIST(root=image_path, train=False, transform=transform, download=False)

In [None]:
from torch.utils.data import DataLoader
batch_size = 64
torch.manual_seed(42)
train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=False)

It is important to note that PyTorch expects image batchs in *NCHW* format (num_batch_img x num_channel x height x width).

In [None]:
class CNN(nn.Module):
    def __init__(self, add_flatten=False):
        super().__init__()
        layers = []
        # convolution layers
        layers.append(nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2))
        layers.append(nn.ReLU())
        layers.append(nn.MaxPool2d(kernel_size=2))
        layers.append(nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2))
        layers.append(nn.ReLU())
        layers.append(nn.MaxPool2d(kernel_size=2))

        if add_flatten:
            layers.append(nn.Flatten())

        self.layers = nn.Sequential(*layers)

    def add_fully_connected(self, input_size, output_size, hidden_sizes):
        assert type(hidden_sizes) == list, "Hidden Sizes not a list."  
        self.layers.add_module("FC1", nn.Linear(input_size, hidden_sizes[0]))
        self.layers.add_module("ReLU", nn.ReLU())
        self.layers.add_module("Dropout", nn.Dropout(p=0.5))
        self.layers.add_module("FC2", nn.Linear(hidden_sizes[-1], output_size))

    def forward(self, x):
        return self.layers(x)

In [None]:
# calculate size of the feature maps
model = CNN(add_flatten=False)
x = torch.ones([4, 1, 28, 28])
model(x).shape

In [None]:
# now add fully connected layers
model = CNN(add_flatten=True)
model(x).shape

In [None]:
model.add_fully_connected(input_size=3136, output_size=10, hidden_sizes=[1024])

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
def train(model, train_loader, val_loader, loss_fn, optimizer, num_epochs=20, log_idx=1, verbose=False):
    model.to(device)

    train_losses = []
    train_accuracies = []
    val_accuracies = []

    for e in range(num_epochs):
        # train
        train_acc, train_examples, train_loss = 0.0, 0, 0.0
        model.train()
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)

            logits = model(features)
            probas = torch.sigmoid(logits)
            preds  = torch.argmax(probas, dim=1)
            loss = loss_fn(logits, labels)

            train_acc += (preds == labels).sum().item()
            train_loss += loss.item()
            train_examples += features.shape[0]

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_losses.append(train_loss/train_examples)
        train_accuracies.append(train_acc/train_examples)

        # validation
        val_acc, val_examples = 0.0, 0
        model.eval()
        with torch.no_grad():
            for features, labels in val_loader:
                features, labels = features.to(device), labels.to(device)
                logits = model(features)
                probas = torch.sigmoid(logits)
                preds  = torch.argmax(probas, dim=1)
                val_acc += (preds == labels).sum().item()
                val_examples += features.shape[0]

            val_accuracies.append(val_acc/val_examples)

        if verbose and e % log_idx == 0:
            print(f"Epoch {e}/{num_epochs}: Train Loss = {train_loss/train_examples:.4f} | "
                  f"Train Acc = {train_acc/train_examples:.4f} | "  
                  f"Val Acc = {val_acc/val_examples}")

    return train_losses, train_accuracies, val_accuracies

In [None]:
train_losses, train_accuracies, val_accuracies = train(model, train_loader, val_loader, loss_fn, optimizer, verbose=True)