## Ungraded Lab: Activation in Custom Layers

In this lab, we extend our knowledge of building custom layers by adding an activation parameter. The implementation is pretty straightforward as you'll see below.

## Imports

In [1]:
import math
import numpy as np
import torch
import torch.nn as nn
from torch.nn.parameter import Parameter
from torchvision.datasets import MNIST
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device: ", device)

device:  cuda


## Adding an activation layer

To use the built-in activations in PyTorch, we can specify an activation parameter in the __init__() method of our custom layer class. From there, we can initialize it by using the forward() method. Then, you can pass in the forward computation to this activation.

In [3]:
class SimpleDense(nn.Module):
    def __init__(self, in_features, out_features, bias=True, activation=None, device=None, dtype=None):
        super(SimpleDense, self).__init__()
        
        factory_kwargs = {'device': device, 'dtype': dtype}
        
        # Input
        self.in_features = in_features
        
        # Bias
        self.bias = bias
        
        self.apply_activation = False
        
        # Activation
        if activation is not None:
            self.apply_activation = True
            self.activation = getattr(nn.functional, activation)
        
        # Weight
        self.weight = Parameter(torch.empty((out_features, in_features), **factory_kwargs))
        
        # Bias
        if bias:
            self.bias = Parameter(torch.empty(out_features, **factory_kwargs))
        else:
            self.register_parameter('bias', None)
        
        # Weight and Biase initialization
        self._reset_parameters()
    
    def forward(self, input):
        x, y = input.shape
        if y != self.in_features:
            print(f'Wrong Input Features. Please use tensor with {self.in_features} Input Features')
            return 0
        
        output = input.matmul(self.weight.t())
        
        if self.bias is not None:
            output += self.bias
        ret = output
        
        if self.apply_activation:
            return self.activation(ret)
        
        return ret
    
    def _reset_parameters(self):
        torch.nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            torch.nn.init.uniform_(self.bias, -bound, bound)

## Prepare the Dataset

In [4]:
# Image Transform
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5,), std=(0.5,))
])

In [5]:
# Load Dataset
train_data = MNIST(root='./', train=True, download=True, transform=transform)
test_data = MNIST(root='./', train=False, download=True, transform=transform)

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [6]:
# DataLoader
train_loader = DataLoader(dataset=train_data,
                          batch_size=32,
                          shuffle=True,
                          num_workers=2,
                          pin_memory=True)
val_loader = DataLoader(dataset=test_data,
                        batch_size=32,
                        shuffle=True,
                        num_workers=2,
                        pin_memory=True)

In [7]:
# Build the Model
model = nn.Sequential(
    SimpleDense(in_features=784, out_features=128, activation="relu"),
    nn.Dropout(0.2),
    SimpleDense(in_features=128, out_features=10),
    nn.LogSoftmax(dim=1),
)
model.to(device)

Sequential(
  (0): SimpleDense()
  (1): Dropout(p=0.2, inplace=False)
  (2): SimpleDense()
  (3): LogSoftmax(dim=1)
)

In [8]:
# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

In [9]:
# Train the Model
EPOCHS = 5

model.train()

for epoch in range(EPOCHS):
    running_loss = 0
    correct = 0
    
    for data in train_loader:
        images, labels = data
        images = images.view(images.shape[0], -1)
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(labels.data.view_as(pred)).cpu().sum()
        
        running_loss += loss.item()
    
    print(f"Epoch: {epoch}, loss: {running_loss/len(train_loader)}, accuracy: {correct/len(train_loader.dataset)}")


# Evaluate Trained Model
running_loss = 0
correct = 0
    
model.eval()
for data in val_loader:
    images, labels = data
    images = images.view(images.shape[0], -1)
    images, labels = images.to(device), labels.to(device)

    output = model(images)
    loss = criterion(output, labels)

    pred = output.data.max(1, keepdim=True)[1]
    correct += pred.eq(labels.data.view_as(pred)).cpu().sum()

    running_loss += loss.item()

print(f"\nValidation - loss: {running_loss/len(val_loader)}, accuracy: {correct/len(val_loader.dataset)}")

Epoch: 0, loss: 0.40303231131831807, accuracy: 0.878350019454956
Epoch: 1, loss: 0.2336706400046746, accuracy: 0.9296500086784363
Epoch: 2, loss: 0.1950585027669867, accuracy: 0.940850019454956
Epoch: 3, loss: 0.1770629089380304, accuracy: 0.945816695690155
Epoch: 4, loss: 0.16116403914218147, accuracy: 0.9504500031471252

Validation - loss: 0.11689013923509434, accuracy: 0.965499997138977
