In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary

In [2]:
class CNN(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels=in_channels,
            out_channels=6,
            kernel_size=3,
            stride=1,
            padding=1
        )
        self.relu1 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(
            in_channels=6,
            out_channels=16,
            kernel_size=3,
            stride=1,
            padding=1
        )
        self.relu2 = nn.ReLU(inplace=True)
        self.pool = nn.MaxPool2d(
            kernel_size=(2, 2),
            stride=(2, 2)
        )
        self.fc1 = nn.Linear(16 * 14 * 14, num_classes)

        self.initialize_weights()

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)

        x = self.conv2(x)
        x = self.relu2(x)

        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)
        
        return x
    
    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_uniform_(m.weight)

                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

            elif isinstance(m, nn.Linear):
                nn.init.kaiming_uniform_(m.weight)
                nn.init.constant_(m.bias, 0)

In [3]:
train_dataset = datasets.MNIST(root='dataset/',
                               transform=transforms.ToTensor(),
                               download=True)

In [4]:
learning_rate = 1.0e-3
in_channels = 1
num_classes = 10
batch_size = 256
num_epochs = 1

model = CNN(in_channels=in_channels,
            num_classes=num_classes).cuda()

summary(model=model, input_size=(1, 28, 28), batch_size=-1)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]              60
              ReLU-2            [-1, 6, 28, 28]               0
            Conv2d-3           [-1, 16, 28, 28]             880
              ReLU-4           [-1, 16, 28, 28]               0
         MaxPool2d-5           [-1, 16, 14, 14]               0
            Linear-6                   [-1, 10]          31,370
Total params: 32,310
Trainable params: 32,310
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.29
Params size (MB): 0.12
Estimated Total Size (MB): 0.41
----------------------------------------------------------------


In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0)
writer = SummaryWriter(
    f"runs/MNIST/MiniBatchSize_{batch_size}/LearningRate_{learning_rate}"
)


train_loader = DataLoader(dataset=train_dataset,
                    batch_size=batch_size,
                    shuffle=True)

avg_loss = 0.0
avg_accs = 0.0

classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

step = 0
losses = []
accuracies = []
for epoch in range(num_epochs):

    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.cuda()
        targets = targets.cuda()

        scores = model(data)
        loss = criterion(scores, targets)
        losses.append(loss.item())

        # update weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # log the image data and the fc1 weights
        features = data.reshape(data.shape[0], -1)
        image_grid = torchvision.utils.make_grid(data)
        _, predictions = scores.max(1)
        num_corrects = (predictions == targets).sum()
        running_train_acc = float(num_corrects) / float(data.shape[0])
        accuracies.append(running_train_acc)

        # tensorboard log
        class_labels = [classes[label] for label in predictions]
        writer.add_image('mnist_images', image_grid, global_step=step)

        # tensorboard log
        writer.add_scalar('Training Loss', loss, global_step=step)
        writer.add_scalar('Training Accuracy', running_train_acc, global_step=step)
        writer.add_histogram('fc1', model.fc1.weight)

        # total dataset / batch size, e.g: 60000 / 8 = 7500.0 or 60000 / 256 = 234.375
        # in other words, we save the features/embeddings per-iteration
        if batch_idx == 230.0:
            writer.add_embedding(features, metadata=class_labels, label_img=data, global_step=batch_idx)

        step += 1
    
    avg_loss = sum(losses) / len(losses)
    avg_accs = sum(accuracies) / len(accuracies)

    writer.add_hparams({'learning_rate' : learning_rate, 'batch_size' : batch_size},
        {'loss' : avg_loss, 'accuracy' : avg_accs})

    print(f"Epoch [{epoch+1}/{num_epochs}], Avg. Loss: {avg_loss}, Avg. Acc: {avg_accs}")

Epoch [1/1], Avg. Loss: 0.3501745581943938, Avg. Acc: 0.8991190159574468


In [None]:
#!tensorboard --logdir runs