## PyTorch Tutorial 16 - How To Use The TensorBoard

https://www.youtube.com/watch?v=VJW9wU-1n18

We can use tensorboard to experiment with our models - visualize. It is deveoped by tensorboard but can ye used by pytorch as well
https://www.tensorflow.org/tensorboard/

We start with code from tutorial 13 - which does digit classification

On commandline (after pip install tensorboard)
```
$tensorboard --logdir=runs
```
Like this

```
(base) jvsingh: -> tensorboard --logdir=runs
Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.3.0 at http://localhost:6006/ (Press CTRL+C to quit)

```



When we open in browser - we do not see any data as we have not written anything as of now

<img src = "images/tsboard-no-data.png" width=600>

In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

In [2]:
# To write data for tensorboard, we need to set up writer
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter("runs/mnist")

In [3]:
# device config
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"device:{device}")
# hyper parameters
input_size = 784 # 28 x 28
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size = 64
learning_rate = 0.001

# MNIST
train_dataset = torchvision.datasets.MNIST(root = "./data", 
                                           train = True,
                                           transform = transforms.ToTensor(),
                                           download = True)

test_dataset = torchvision.datasets.MNIST(root = "./data", 
                                          train = False,
                                          transform = transforms.ToTensor(),
                                          download= True)

# we shuffle
train_loader = torch.utils.data.DataLoader(dataset = train_dataset, 
                                           batch_size = batch_size,
                                          shuffle = True)

# we dont shuffle
test_loader = torch.utils.data.DataLoader(dataset = test_dataset, 
                                           batch_size = batch_size,
                                          shuffle = False)



examples = iter(train_loader)
example_data, example_targets = examples.next()
print(example_data.shape, example_targets.shape)

device:cpu
torch.Size([64, 1, 28, 28]) torch.Size([64])


In [4]:
#for i in range(6):
#    # 2 rows and three columns
#    plt.subplot(2, 3, i + 1)
#    # i th sample 
#    # 0: because we want to access first channel(we have only one)
#    # cmap: colour map
#    plt.imshow(example_data[i][0], cmap="gray")

# Rather than plotting - we will add our images to tensorboard 
image_grid = torchvision.utils.make_grid(example_data)
writer.add_image('mnist_images', image_grid)
writer.close() # Dlushes outputs
# sys.exit()

Running above will write images into tensorfboard input folder and you can refresh tensorboard to see the images
<img src="images/ts-board-images-appear.png" width=400>


We will also add to writer as below ( see after setting optimizer below). After running below you can see

<img src="images/ts-board-graphs.png" width=400>

In [5]:
#for i, (images, labels) in enumerate(train_loader):
#    print(f"i={i} images.size={images.size()[0]}")
#
#print(len(train_dataset))

In [6]:
937 * 64 + 32

60000

In [7]:
import torch.nn.functional as F


class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        # no softmax at the end
        return out
    
model = NeuralNet(input_size, hidden_size, num_classes)

#loss and optimizer
criterion = nn.CrossEntropyLoss()  #applies softmax
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

# For viewing in Tensorboard
writer.add_graph(model, example_data.reshape(-1, 28 * 28))
writer.close()
#sys.exit()
# For viewing in Tensorboard

#trainign loop
n_total_steps = len(train_loader)

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # we have to reshape the image : 100, 1 , 28, 28
        # we need 100, 784   (784 = 28 x 28)
        images = images.reshape(-1, 28 * 28).to(device) #pushes to gpu if avilable
        lables = labels.to(device)
        
        # forward
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # backwards 
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        #Tutorial says zero_grad can be put in any order
        #but only thing to make sure is that it must be 
        #called before next iteration
        # I wonder why ? because I am thinking that optimizer.step() may be using 
        # gradient information to step.
        # it seems that optimizer.step() might be using gradient info inside to step. So if you make it zero before stepping, then first step goes waste. However the code still works because the next step() call may be using gradient of previous iteration. So, there might be a "loss" of one batch of data, but still it works.
        
        
        if (i + 1)% 100 == 0:
            print(f'epoch {epoch + 1}/{num_epochs}, step {i+1}/{n_total_steps} loss={loss.item():.4f}')
            
        
        

epoch 1/5, step 100/938 loss=0.5760
epoch 1/5, step 200/938 loss=0.2235
epoch 1/5, step 300/938 loss=0.2829
epoch 1/5, step 400/938 loss=0.1187
epoch 1/5, step 500/938 loss=0.1124
epoch 1/5, step 600/938 loss=0.0829
epoch 1/5, step 700/938 loss=0.1824
epoch 1/5, step 800/938 loss=0.1426
epoch 1/5, step 900/938 loss=0.2102
epoch 2/5, step 100/938 loss=0.1518
epoch 2/5, step 200/938 loss=0.1374
epoch 2/5, step 300/938 loss=0.1223
epoch 2/5, step 400/938 loss=0.1375
epoch 2/5, step 500/938 loss=0.0711
epoch 2/5, step 600/938 loss=0.0945
epoch 2/5, step 700/938 loss=0.1751
epoch 2/5, step 800/938 loss=0.0765
epoch 2/5, step 900/938 loss=0.0800
epoch 3/5, step 100/938 loss=0.0319
epoch 3/5, step 200/938 loss=0.0652
epoch 3/5, step 300/938 loss=0.0692
epoch 3/5, step 400/938 loss=0.0635
epoch 3/5, step 500/938 loss=0.0381
epoch 3/5, step 600/938 loss=0.1839
epoch 3/5, step 700/938 loss=0.0209
epoch 3/5, step 800/938 loss=0.0759
epoch 3/5, step 900/938 loss=0.1061
epoch 4/5, step 100/938 loss

In [8]:
writer = SummaryWriter("runs/mnist1")

#WE JUST ADD TRAING LOSS and ACCURACY - we do not change anything else
#Re create model
model = NeuralNet(input_size, hidden_size, num_classes)
#loss and optimizer
criterion = nn.CrossEntropyLoss()  #applies softmax
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

n_total_steps = len(train_loader)

writer.add_graph(model, example_data.reshape(-1, 28 * 28))
#writer.close()


for epoch in range(num_epochs):
    running_loss      = 0.0
    running_correct   = 0.0
    running_incorrect = 0.0    
    for i, (images, labels) in enumerate(train_loader):
        #print(f"debug- epoch:{epoch}, i={i}")
        # we have to reshape the image : 100, 1 , 28, 28
        # we need 100, 784   (784 = 28 x 28)
        images = images.reshape(-1, 28 * 28).to(device) #pushes to gpu if avilable
        lables = labels.to(device)
        
        # forward
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # backwards
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        # just to show in tensorboard
        # if (i + 1)% 200 == 0:
        #    print(f"loss.item()={loss.item()}")
        running_loss += loss.item()
        _, predictions = torch.max(outputs, 1) #along dimension 1
        _correct   = (predictions == labels).sum().item() 
        _incorrect = predictions.size()[0] - _correct
        running_correct   += _correct
        running_incorrect += _incorrect
        #print(f"predictions.size: {predictions.size()[0]}")
        
        if (i + 1)% 100 == 0:
            print(f'epoch {epoch + 1}/{num_epochs}, step {i+1}/{n_total_steps} loss={loss.item():.4f}') 
            
    ############## TENSORBOARD ########################
    # we also calculate mean value and add to tensorboard
    writer.add_scalar('training loss', running_loss, epoch * n_total_steps + i)
    running_accuracy = running_correct / (running_correct + running_incorrect)
    writer.add_scalar('accuracy', running_accuracy, epoch * n_total_steps + i)
    print(f'At epoch level : running_loss:{running_loss} running_accuracy:{running_accuracy} ')
    ############## TENSORBOARD ########################

writer.close()  

epoch 1/5, step 100/938 loss=0.3006
epoch 1/5, step 200/938 loss=0.1892
epoch 1/5, step 300/938 loss=0.2882
epoch 1/5, step 400/938 loss=0.1905
epoch 1/5, step 500/938 loss=0.2923
epoch 1/5, step 600/938 loss=0.2072
epoch 1/5, step 700/938 loss=0.1655
epoch 1/5, step 800/938 loss=0.0742
epoch 1/5, step 900/938 loss=0.2098
At epoch level : running_loss:241.31286326795816 running_accuracy:0.9269 
epoch 2/5, step 100/938 loss=0.0978
epoch 2/5, step 200/938 loss=0.0917
epoch 2/5, step 300/938 loss=0.1434
epoch 2/5, step 400/938 loss=0.0937
epoch 2/5, step 500/938 loss=0.0685
epoch 2/5, step 600/938 loss=0.1672
epoch 2/5, step 700/938 loss=0.1848
epoch 2/5, step 800/938 loss=0.0952
epoch 2/5, step 900/938 loss=0.0828
At epoch level : running_loss:96.06545485556126 running_accuracy:0.9689333333333333 
epoch 3/5, step 100/938 loss=0.0230
epoch 3/5, step 200/938 loss=0.0171
epoch 3/5, step 300/938 loss=0.0644
epoch 3/5, step 400/938 loss=0.0651
epoch 3/5, step 500/938 loss=0.0887
epoch 3/5, st

We see scalar in tensorboard after adding above metrics pushed thorugh **writer**


<img src="images/ts-scalars.png" width=400>

If we do another run with different writer
```python
writer = SummaryWriter("runs/mnist2")
```
then we will see a new line with "minst2" tag. This way we can compare many performances together



#### Now let use see out model performances

We can add precision recall curve - it makes more sense in binary classification. But if we analyse each class separately then we do have a kind of binary classication with respect to each class.

In the [tensorboard link](https://pytorch.org/docs/stable/tensorboard.html#), we can search for **add_pr_curve**, which is adding the precision and recall curve. See the description there.

```
add_pr_curve(tag, labels, predictions, global_step=None, num_thresholds=127, weights=None, walltime=None)
```

In [9]:
# import torch.nn.functional as F
class_labels = []
class_preds = []

# test
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28 * 28).to(device) #pushes to gpu if avilable
        lables = labels.to(device)
        outputs = model(images)
        
        #value, index
        _, predicted = torch.max(outputs, 1) #along dimension 1
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()
        
        # Note that outputs are from linear layer at the end. In NN code out = self.l2(out)
        # , we see No activation and no softmax at the end(CrossEntropyLoss had taken care of this part while
        # calculating loss)
        # But in evaluation here we want actual probabilities: So here we need softmax function
        # explicitly on the outputs
        class_probs_batch = [F.softmax(output, dim=0) for output in outputs]
        #print(f"debug:{class_probs_batch}")
        class_preds.append(class_probs_batch)
        class_labels.append(predicted)
    
    # we concatenate into one dimensional tensor
    class_labels = torch.cat(class_labels)  #Since we have 10000 samples, the shape will be 10000x1
    class_preds = torch.cat([torch.stack(batch) for batch in class_preds]) # this will be 10000 x 10, since we have 10 classes
    
    acc = 100.0 * n_correct/n_samples
    print(f'Accuracy of the network on the test images = {acc} %')
    
    ############## TENSORBOARD ########################
    classes = range(10)
    for i in classes:
        labels_i = class_labels == i
        preds_i = class_preds[:, i]
        writer.add_pr_curve(str(i), labels_i, preds_i, global_step=0)
    writer.close()
    ############## TENSORBOARD ########################

Accuracy of the network on the test images = 98.06 %
