In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

from torch.utils.tensorboard import SummaryWriter

# set display option for output
torch.set_printoptions(linewidth = 120)
# on by default
torch.set_grad_enabled(True)

<torch.autograd.grad_mode.set_grad_enabled at 0x7fedb8068610>

In [2]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data/FashionMNIST'
    ,train = True
    ,download = True
    ,transform = transforms.Compose([
        transforms.ToTensor()
    ])
)

In [3]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [4]:
# network class
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        # conv layers
        self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5)
        self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 12, kernel_size = 5)
        # fc, lin, dense layers
        self.fc1 = nn.Linear(in_features = 12*4*4, out_features = 120)
        self.fc2 = nn.Linear(in_features = 120, out_features = 60)
        self.out = nn.Linear(in_features = 60, out_features = 10)
        
    def forward(self, t):
        # layer 1
        t = t
        
        # layer 2
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size = 2, stride = 2)
        
        # layer 3
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size = 2, stride = 2)
        
        # layer 4
        t = t.reshape(-1, 12*4*4)
        t = self.fc1(t)
        t = F.relu(t)
        
        # layer 5
        t = self.fc2(t)
        t = F.relu(t)
        
        # output
        t = self.out(t)
        
        return(t)
        

In [24]:
from itertools import product

parameters = dict(
    lr = [0.01, 0.001],
    batch_size = [1000, 10000],
    shuffle = [True, False]
)

param_values = [v for v in parameters.values()]

for lr, batch_size, shuffle in product(*param_values):
    print(lr, batch_size, shuffle)

0.01 1000 True
0.01 1000 False
0.01 10000 True
0.01 10000 False
0.001 1000 True
0.001 1000 False
0.001 10000 True
0.001 10000 False


In [25]:
#network = Network()
# batch_size = 100
# lr = 0.01

for lr, batch_size, shuffle in product(*param_values):
    network = Network()
    train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle = True)
    optimizer = optim.Adam(network.parameters(), lr = lr)

    images, labels = next(iter(train_loader))
    grid = torchvision.utils.make_grid(images)

    comment = f' batch_size={batch_size} lr={lr} shuffle={shuffle}'
    tb = SummaryWriter(comment=comment)
    tb.add_image('images', grid)
    tb.add_graph(network, images)

    for epoch in range(3):
        total_loss = 0
        total_correct = 0

        for batch in train_loader:
            images, labels = batch

            # pass a batch
            preds = network(images)
            # calculate the loss
            loss = F.cross_entropy(preds, labels)

            # zero out accumulated gradient
            optimizer.zero_grad()
            # calculate new gradients
            loss.backward()
            # update network weights
            optimizer.step()

            total_loss += loss.item()*batch_size
            total_correct += get_num_correct(preds, labels)

        tb.add_scalar('Loss', total_loss, epoch)
        tb.add_scalar('Number Correct', total_correct, epoch)
        tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)

        for name, weight in network.named_parameters():
            tb.add_histogram(name, weight, epoch)
            tb.add_histogram(f'{name}.grad', weight.grad, epoch)

        print('epoch: ', epoch, 'total_correct: ', total_correct, 'loss: ', total_loss)
    tb.close()

epoch:  0 total_correct:  37813 loss:  57555.59951066971
epoch:  1 total_correct:  48533 loss:  30179.37308549881
epoch:  2 total_correct:  50917 loss:  24442.48142838478
epoch:  0 total_correct:  37330 loss:  58796.29546403885
epoch:  1 total_correct:  47622 loss:  32171.751022338867
epoch:  2 total_correct:  49829 loss:  27324.55202937126
epoch:  0 total_correct:  13320 loss:  126625.98013877869
epoch:  1 total_correct:  29485 loss:  82211.23814582825
epoch:  2 total_correct:  36499 loss:  61504.396200180054
epoch:  0 total_correct:  11535 loss:  129398.63801002502
epoch:  1 total_correct:  29733 loss:  79265.78044891357
epoch:  2 total_correct:  38784 loss:  56915.616393089294
epoch:  0 total_correct:  26772 loss:  99195.48845291138
epoch:  1 total_correct:  42492 loss:  45866.133987903595
epoch:  2 total_correct:  45106 loss:  38869.5964217186
epoch:  0 total_correct:  28324 loss:  98757.77024030685
epoch:  1 total_correct:  43052 loss:  44722.99253940582
epoch:  2 total_correct:  

In [9]:
network.conv1.weight.grad

tensor([[[[-0.0038, -0.0005,  0.0116, -0.0370, -0.0551],
          [-0.0045,  0.0013,  0.0055, -0.0415, -0.0411],
          [-0.0069,  0.0039,  0.0014, -0.0296, -0.0335],
          [ 0.0036,  0.0216,  0.0053, -0.0341, -0.0507],
          [ 0.0130,  0.0154, -0.0206, -0.0401, -0.0630]]],


        [[[-0.0494, -0.0115, -0.0771, -0.0641, -0.1180],
          [-0.0804, -0.0403, -0.0810, -0.0662, -0.1259],
          [-0.0976, -0.0296, -0.0930, -0.0598, -0.0854],
          [-0.1145, -0.0317, -0.0703, -0.0404, -0.0375],
          [-0.1028, -0.0368, -0.0813, -0.0516, -0.0460]]],


        [[[-0.0019, -0.0196,  0.0275,  0.0236, -0.0215],
          [ 0.0008,  0.0089,  0.0207,  0.0479, -0.0037],
          [-0.0067, -0.0024,  0.0134,  0.0423,  0.0285],
          [-0.0107,  0.0114,  0.0205,  0.0293,  0.0373],
          [ 0.0175,  0.0135,  0.0289,  0.0433,  0.0479]]],


        [[[ 0.1908,  0.2078,  0.1680,  0.1956,  0.2211],
          [ 0.1655,  0.1617,  0.1361,  0.1624,  0.2028],
          [ 0.1486,

In [9]:
tb = SummaryWriter()

network = Network()
images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb.add_image('images', grid)
tb.add_graph(network, images)
tb.close()

In [14]:
# this will not work unless you have updated your gradients !!!! weight.grad will return None
tb.add_scalar('Loss', total_loss, epoch)
tb.add_scalar('Number Correct', total_correct, epoch)
tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)

tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)

In [6]:
network = Network()
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb = SummaryWriter()
tb.add_image('images', grid)
tb.add_graph(network, images)

for epoch in range(1):

    total_loss = 0
    total_correct = 0

    for batch in train_loader: # Get Batch

        # Pass Batch
        # Calculate Loss
        # Calculate Gradient
        # Update Weights

        tb.add_scalar('Loss', total_loss, epoch)
        tb.add_scalar('Number Correct', total_correct, epoch)
        tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)

        tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
        tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
        tb.add_histogram(
            'conv1.weight.grad'
            ,network.conv1.weight.grad
            ,epoch
        )

        print(
            "epoch", epoch, 
            "total_correct:", total_correct, 
            "loss:", total_loss
        )

tb.close()

NotImplementedError: Got <class 'NoneType'>, but numpy array, torch tensor, or caffe2 blob name are expected.