In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import torch
from torch import nn
import torchvision
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

import os
import shutil

from torchinfo import summary
import torch.utils.tensorboard as tb

import models
import mnist

torch.manual_seed(0);

In [3]:
# import tempfile
# tb_log_dir = tempfile.mkdtemp()
user = os.getlogin()
# tb_log_dir = os.path.join(cp.data_dir, 'tensorboard/', user)
tb_log_dir = f'/tmp/tensorboard/{user}'
# print(tb_log_dir)
if os.path.exists(tb_log_dir):
    shutil.rmtree(tb_log_dir)
    

In [4]:
logger = tb.SummaryWriter(tb_log_dir)

In [5]:
net = models.ConvNet()
summary_kwargs = {'input_size': (mnist.batch_size_train, 1, 28, 28), 
                  'col_names': ["input_size", "output_size", "num_params", "kernel_size"]}
summary(net, **summary_kwargs)

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #                   Kernel Shape
ConvNet                                  --                        --                        --                        --
├─Conv2d: 1-1                            [64, 1, 28, 28]           [64, 2, 26, 26]           20                        [1, 2, 3, 3]
├─Conv2d: 1-2                            [64, 2, 13, 13]           [64, 5, 11, 11]           95                        [2, 5, 3, 3]
├─Conv2d: 1-3                            [64, 5, 5, 5]             [64, 10, 3, 3]            460                       [5, 10, 3, 3]
├─Linear: 1-4                            [64, 10]                  [64, 10]                  110                       [10, 10]
Total params: 685
Trainable params: 685
Non-trainable params: 0
Total mult-adds (M): 1.74
Input size (MB): 0.20
Forward/backward pass size (MB): 1.05
Params size (MB): 0.00
Estimated Total Size (MB): 1.26

In [6]:
mnist.perform_stats(net, tqdm=tqdm)

  0%|          | 0/4 [00:00<?, ?it/s]

Average Loss: 2.322, Accuracy: 10.090%


(2.3219466686248778, 0.1009)

In [7]:
# opt = torch.optim.SGD(net.parameters(), lr=1e-3)
opt = torch.optim.Adam(net.parameters(), lr=1e-2)

In [8]:
for epoch in tqdm(range(2)):
    for batch_idx, (X_batch, Y_batch) in tqdm(enumerate(mnist.train_loader), 
                                              leave=False, total=len(mnist.train_loader)):
        Y_batch_pred = net(X_batch)
        loss = mnist.loss_func(Y_batch_pred.log(), Y_batch)
        opt.zero_grad()
        loss.backward()
        opt.step()
    mnist.perform_stats(net, tqdm=tqdm)
    
    

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

Average Loss: 0.356, Accuracy: 88.540%


  0%|          | 0/938 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

Average Loss: 0.266, Accuracy: 91.660%


In [9]:
mnist.perform_stats(net, tqdm=tqdm)

  0%|          | 0/4 [00:00<?, ?it/s]

Average Loss: 0.266, Accuracy: 91.660%


(0.26581011712551117, 0.9166)