[A Complete Guide to Using TensorBoard with PyTorch](https://towardsdatascience.com/a-complete-guide-to-using-tensorboard-with-pytorch-53cb2301e8c3)


In [1]:
import torch
print(torch.__version__)

1.10.2


In [2]:
import torch
import torch.nn as nn
import torch.optim as opt
torch.set_printoptions(linewidth=120)
import torch.nn.functional as F
import torchvision

import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter

In [3]:
def get_num_correct(preds, labels):
    ''' get the number of correct labels after training of the model'''
    return preds.argmax(dim=1).eq(labels).sum().item()

### CNN Model

In [4]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.out(x)

        return x

### Importing data and creating the train loader

In [5]:
train_set = torchvision.datasets.FashionMNIST(root="./data", train=True, download=True, transform=transforms.ToTensor())
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=True)

### Displaying Images And Graphs with TensorBoard

In [6]:
tb = SummaryWriter()
model = CNN()
images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb.add_image("images", grid)
tb.add_graph(model, images)
tb.close()

In [7]:
# !tendorboard --logdir runs

### Training Loop to visualize Evaluation

In [8]:
device = ("cuda" if torch.cuda.is_available() else 'cpu')
model = CNN().to(device)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=True)
optimizer = opt.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

tb = SummaryWriter()

for epoch in range(10):

    total_loss = 0
    total_correct = 0

    # for batch_id, (images, labels) in enumerate(train_loader):
    for iamges, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        preds = model(images)

        loss = criterion(preds, labels)
        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # tb.add_scalar("Loss", total_loss, batch_id)
    tb.add_scalar("Loss", total_loss, epoch)
    tb.add_scalar("Correct", total_correct, epoch)
    tb.add_scalar("Accuracy", total_correct/len(train_set), epoch)

    tb.add_histogram("conv1.bias", model.conv1.bias, epoch)
    tb.add_histogram("conv1.weight", model.conv1.weight, epoch)
    tb.add_histogram("conv2.bias", model.conv2.bias, epoch)
    tb.add_histogram("conv2.weight", model.conv2.weight, epoch)
    # for name, weight in model.named_parameters():
    #     tb.add_histogram(name, weight, epoch)
    #     tb.add_histogram(f"{name}.grad", weight.grad, epoch)

    print("epoch:", epoch, "total_correct:", total_correct, "loss", total_loss)

tb.close()

epoch: 0 total_correct: 5834 loss 1382.1988372802734
epoch: 1 total_correct: 6066 loss 1382.0108075141907
epoch: 2 total_correct: 6062 loss 1381.9321489334106
epoch: 3 total_correct: 6098 loss 1382.0273213386536
epoch: 4 total_correct: 5943 loss 1382.053212404251
epoch: 5 total_correct: 5969 loss 1382.074637413025
epoch: 6 total_correct: 5915 loss 1382.0701925754547
epoch: 7 total_correct: 6014 loss 1382.0029339790344
epoch: 8 total_correct: 6019 loss 1381.9855835437775
epoch: 9 total_correct: 5846 loss 1382.0712904930115


### Hyterparameter Tunning

In [9]:
from itertools import product
parameters = dict(
    lr = [0.01, 0.001],
    batch_size = [32, 64, 128],
    shuffle = [True, False]
)

param_values = [v for v in parameters.values()]
print(param_values)

for lr, batch_size, shuffle in product(*param_values):
    print(lr, batch_size, shuffle)

[[0.01, 0.001], [32, 64, 128], [True, False]]
0.01 32 True
0.01 32 False
0.01 64 True
0.01 64 False
0.01 128 True
0.01 128 False
0.001 32 True
0.001 32 False
0.001 64 True
0.001 64 False
0.001 128 True
0.001 128 False


### Modified Training Loop

In [10]:
for run_id, (lr, batch_size, shuffle) in enumerate(product(*param_values)):
    print("run id:", run_id + 1)
    model = CNN().to(device)
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=shuffle)
    optimizer = opt.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.CrossEntropyLoss()
    comment = f' batch_size = {batch_size} lr = {lr} shuffle = {shuffle}'
    tb = SummaryWriter(comment=comment)
    for epoch in range(5):
        total_loss = 0
        total_correct = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            preds = model(images)

            loss = criterion(preds, labels)
            total_loss += loss.item()
            total_correct += get_num_correct(preds, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        tb.add_scalar("Loss", total_loss, epoch)
        tb.add_scalar("Correct", total_correct, epoch)
        tb.add_scalar("Accuracy", total_correct/len(train_set), epoch)

        print("batch_size:", batch_size, "lr:", lr, "shuffle:", shuffle)
        print("epoch:", epoch, "total_correct", total_correct, "loss:", total_loss)
    
    print("_______________________________________________________")
        
    tb.add_hparams(
        {"lr":lr, "bsize":batch_size, "shuffle":shuffle}, # hyperparameters
        {
            "accuracy":total_correct/len(train_set),
            "loss":total_loss,
        }, # evaluation metrics
    )

tb.close()

run id: 1
batch_size: 32 lr: 0.01 shuffle: True
epoch: 0 total_correct 47888 loss: 1018.841361835599
batch_size: 32 lr: 0.01 shuffle: True
epoch: 1 total_correct 50730 loss: 780.9257594048977
batch_size: 32 lr: 0.01 shuffle: True
epoch: 2 total_correct 51298 loss: 750.2692514099181
batch_size: 32 lr: 0.01 shuffle: True
epoch: 3 total_correct 51575 loss: 730.8675047084689
batch_size: 32 lr: 0.01 shuffle: True
epoch: 4 total_correct 51598 loss: 730.834707390517
_______________________________________________________
run id: 2
batch_size: 32 lr: 0.01 shuffle: False
epoch: 0 total_correct 46656 loss: 1102.8294258713722
batch_size: 32 lr: 0.01 shuffle: False
epoch: 1 total_correct 49949 loss: 848.2553975880146
batch_size: 32 lr: 0.01 shuffle: False
epoch: 2 total_correct 50205 loss: 824.955286026001
batch_size: 32 lr: 0.01 shuffle: False
epoch: 3 total_correct 50602 loss: 803.3780739232898
batch_size: 32 lr: 0.01 shuffle: False
epoch: 4 total_correct 50680 loss: 794.6867438405752
__________