In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)

import torch.cuda as t
import cupy as cu
import numpy as np

from torch.utils.tensorboard import SummaryWriter
import tensorboard

from itertools import product

In [2]:
print("PyTorch v"+torch.__version__)
print("Torchvision v"+torchvision.__version__)
print("Tensorboard v"+tensorboard.__version__)

PyTorch v1.9.0
Torchvision v0.10.0
Tensorboard v2.4.0


In [3]:
# Check if GPU is avail
print(torch.cuda.get_device_name(0))
print(torch.cuda.is_available())

# Support both CPU and GPU, when avail

if torch.cuda.is_available():  
  dev = "cuda:0" 
  cu = cu
else:  
  dev = "cpu"  
  cu = np

device = torch.device(dev)

# Utility function to interchangbly support CPU and GPU when avail
def gpu(t):
    return t.to(device)

GeForce RTX 2080 Ti
True


In [4]:
gpu_cpu_test = gpu(torch.zeros(4,3))
print(gpu_cpu_test)

# Not piped to gpu
t = torch.Tensor()

# uniform number type for tensor
print(t.dtype)

# gpu or cpu?
print(t.device)

# Strided is default for how tensors are laid out in mem
print(t.layout)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], device='cuda:0')
torch.float32
cpu
torch.strided


In [5]:

'''
# NVIDIA GPU Computing Tips with code example: 
When working with #cupy and #pytorch, you might have to do a 
couple things diff to turn a #numpy / cupy array into a tensor. 
First you need to copy the numpy (cupy )array, this will 
allocate new memory for numpy array . This is to prevent  
negative strides within Ndarray which are  the number of 
locations in memory between beginnings of successive array elements. 
Next, we flip it with the Axis in the array, which  
tells which entries are reversed. 
Lastly, we can finally convert the copied array to a tensor. 
dtypes are data type enforcement and cuda:0 is piping data to the 
GPU device which has an index of 0. This provides 7-9x speed up on 
processing in most scenarios for computer vision, and 20x speed up 
for analytics/non-machine vision applications like NLP. 
The following solution performs near 100% of the 
operations on the GPU:
'''

cupy_gpu = torch.tensor(
    cu.flip(cu.copy(
        cu.array([1,2,3,4,5,6])),
            axis=0),
            dtype=torch.float16, 
            device=dev
)

print(cupy_gpu)

tensor([6., 0., 0., 0., 0., 0.], device='cuda:0', dtype=torch.float16)


In [6]:
# CPU Bound Op
numpy_cpu = np.arange(15).reshape(3, 5)
print(numpy_cpu)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]


In [7]:
## Start the example using Torch+Tensorboard

def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [8]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
       
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = t.flatten(start_dim=1)
        t = F.relu(self.fc1(t))
        
        t = F.relu(self.fc2(t))
        
        t = self.out(t)
        
        return t

In [9]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [10]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=True)

In [11]:
### Starting out with TensorBoard (Network Graph and Images)

In [12]:
tb = SummaryWriter()

network = Network()
images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb.add_image('images', grid)
tb.add_graph(network, images)
tb.close()

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [13]:
### Training Loop Review

In [14]:
network = Network()
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=True)
optimizer = optim.Adam(network.parameters(), lr=0.01)

for epoch in range(1):
    
    total_loss = 0
    total_correct = 0
    
    for batch in train_loader: # Get Batch
        images, labels = batch 
        
        preds = network(images) # Pass Batch
        loss = F.cross_entropy(preds, labels) # Calculate Loss

        optimizer.zero_grad()
        loss.backward() # Calculate Gradients
        optimizer.step() # Update Weights

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)

    print("epoch", epoch, "total_correct:", total_correct, "loss:", total_loss)

epoch 0 total_correct: 47124 loss: 342.5143479704857


In [15]:
# Training Loop with Tensorboard:

In [16]:
network = Network()
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=True)
optimizer = optim.Adam(network.parameters(), lr=0.01)

images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb = SummaryWriter()
tb.add_image('images', grid)
tb.add_graph(network, images)

for epoch in range(10):
    
    total_loss = 0
    total_correct = 0
    
    for batch in train_loader: # Get Batch
        images, labels = batch 

        preds = network(images) # Pass Batch
        loss = F.cross_entropy(preds, labels) # Calculate Loss

        optimizer.zero_grad()
        loss.backward() # Calculate Gradients
        optimizer.step() # Update Weights

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)
    
    tb.add_scalar('Loss', total_loss, epoch)
    tb.add_scalar('Number Correct', total_correct, epoch)
    tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)
    
    tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
    tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
    tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)
    
    print("epoch", epoch, "total_correct:", total_correct, "loss:", total_loss)
    
tb.close()

epoch 0 total_correct: 47551 loss: 329.9411251246929
epoch 1 total_correct: 51709 loss: 224.40080043673515
epoch 2 total_correct: 52501 loss: 204.56086319684982
epoch 3 total_correct: 52866 loss: 193.04770627617836
epoch 4 total_correct: 52924 loss: 191.4370125681162
epoch 5 total_correct: 53228 loss: 187.44795136898756
epoch 6 total_correct: 53372 loss: 180.5123216882348
epoch 7 total_correct: 53406 loss: 179.26758498698473
epoch 8 total_correct: 53441 loss: 178.64229875802994
epoch 9 total_correct: 53312 loss: 180.6327881887555


In [17]:
### CNN Training Hyperparamters - Neural Networks

In [18]:
network = Network()
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=True)
optimizer = optim.Adam(network.parameters(), lr=0.01)

images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb = SummaryWriter()
tb.add_image('images', grid)
tb.add_graph(network, images)

for epoch in range(5):
    
    total_loss = 0
    total_correct = 0
    
    for batch in train_loader: # Get Batch
        images, labels = batch 

        preds = network(images) # Pass Batch
        loss = F.cross_entropy(preds, labels) # Calculate Loss

        optimizer.zero_grad()
        loss.backward() # Calculate Gradients
        optimizer.step() # Update Weights

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)
    
    tb.add_scalar('Loss', total_loss, epoch)
    tb.add_scalar('Number Correct', total_correct, epoch)
    tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)
    
    #tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
    #tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
    #tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)
    
    for name, weight in network.named_parameters():
        tb.add_histogram(name, weight, epoch)
        tb.add_histogram(f'{name}.grad', weight.grad, epoch)
    
    print("epoch", epoch, "total_correct:", total_correct, "loss:", total_loss)
    
tb.close()

epoch 0 total_correct: 47264 loss: 337.4169496446848
epoch 1 total_correct: 51555 loss: 231.8068321943283
epoch 2 total_correct: 52183 loss: 212.24386222660542
epoch 3 total_correct: 52453 loss: 203.61098565161228
epoch 4 total_correct: 52838 loss: 194.67393566668034


In [19]:
for name, weight in network.named_parameters():
    print(name, weight.shape)

conv1.weight torch.Size([6, 1, 5, 5])
conv1.bias torch.Size([6])
conv2.weight torch.Size([12, 6, 5, 5])
conv2.bias torch.Size([12])
fc1.weight torch.Size([120, 192])
fc1.bias torch.Size([120])
fc2.weight torch.Size([60, 120])
fc2.bias torch.Size([60])
out.weight torch.Size([10, 60])
out.bias torch.Size([10])


In [20]:
for name, weight in network.named_parameters():
    print(f'{name}.grad', weight.grad.shape)

conv1.weight.grad torch.Size([6, 1, 5, 5])
conv1.bias.grad torch.Size([6])
conv2.weight.grad torch.Size([12, 6, 5, 5])
conv2.bias.grad torch.Size([12])
fc1.weight.grad torch.Size([120, 192])
fc1.bias.grad torch.Size([120])
fc2.weight.grad torch.Size([60, 120])
fc2.bias.grad torch.Size([60])
out.weight.grad torch.Size([10, 60])
out.bias.grad torch.Size([10])


In [21]:
### Paramterized Hyperparameters

In [22]:
batch_size = 100
lr = 0.01

network = Network()
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
optimizer = optim.Adam(network.parameters(), lr=lr)

images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

comment = f' batch_size={batch_size} lr={lr}'
tb = SummaryWriter(comment=comment)
tb.add_image('images', grid)
tb.add_graph(network, images)

for epoch in range(2):
    total_loss = 0
    total_correct = 0
    for batch in train_loader:
        images, labels = batch # Get Batch
        preds = network(images) # Pass Batch
        loss = F.cross_entropy(preds, labels) # Calculate Loss
        optimizer.zero_grad() # Zero Gradients
        loss.backward() # Calculate Gradients
        optimizer.step() # Update Weights

        total_loss += loss.item() * batch_size
        total_correct += get_num_correct(preds, labels)
    
    tb.add_scalar('Loss', total_loss, epoch)
    tb.add_scalar('Number Correct', total_correct, epoch)
    tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)
    
    for name, param in network.named_parameters():
        tb.add_histogram(name, param, epoch)
        tb.add_histogram(f'{name}.grad', param.grad, epoch)
    
    print("epoch", epoch, "total_correct:", total_correct, "loss:", total_loss)  
tb.close()

epoch 0 total_correct: 46122 loss: 36384.994104504585
epoch 1 total_correct: 50953 loss: 24450.16450881958


In [23]:
batch_size_list = [100, 1000, 10000]
lr_list = [.01, .001, .0001, .00001]

for batch_size in batch_size_list:
    for lr in lr_list:
        network = Network()
        train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
        optimizer = optim.Adam(network.parameters(), lr=lr)

        images, labels = next(iter(train_loader))
        grid = torchvision.utils.make_grid(images)

        comment=f' batch_size={batch_size} lr={lr}'
        tb = SummaryWriter(comment=comment)
        tb.add_image('images', grid)
        tb.add_graph(network, images)

        for epoch in range(1):
            total_loss = 0
            total_correct = 0
            for batch in train_loader:
                images, labels = batch # Get Batch
                preds = network(images) # Pass Batch
                loss = F.cross_entropy(preds, labels) # Calculate Loss
                optimizer.zero_grad() # Zero Gradients
                loss.backward() # Calculate Gradients
                optimizer.step() # Update Weights

                total_loss += loss.item() * batch_size
                total_correct += get_num_correct(preds, labels)

            tb.add_scalar('Loss', total_loss, epoch)
            tb.add_scalar('Number Correct', total_correct, epoch)
            tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)

            for name, param in network.named_parameters():
                tb.add_histogram(name, param, epoch)
                tb.add_histogram(f'{name}.grad', param.grad, epoch)

            print("epoch", epoch, "total_correct:", total_correct, "loss:", total_loss)  
        tb.close()

epoch 0 total_correct: 46806 loss: 35112.37597465515
epoch 0 total_correct: 41893 loss: 47429.14132773876
epoch 0 total_correct: 32811 loss: 80122.57377505302
epoch 0 total_correct: 7595 loss: 137384.78164672852
epoch 0 total_correct: 39205 loss: 54641.52693748474
epoch 0 total_correct: 30477 loss: 91507.25823640823
epoch 0 total_correct: 9390 loss: 137259.95635986328
epoch 0 total_correct: 6146 loss: 138244.85564231873
epoch 0 total_correct: 13004 loss: 130699.30791854858
epoch 0 total_correct: 7904 loss: 137979.51936721802
epoch 0 total_correct: 6057 loss: 138276.40771865845
epoch 0 total_correct: 6050 loss: 138321.31147384644


In [24]:
parameters = dict(
    lr = [.01, .001]
    ,batch_size = [10, 100, 1000]
    ,shuffle = [True, False]
)

In [25]:
param_values = [v for v in parameters.values()]
param_values

[[0.01, 0.001], [10, 100, 1000], [True, False]]

In [26]:
for lr, batch_size, shuffle in product(*param_values): 
    print (lr, batch_size, shuffle)

0.01 10 True
0.01 10 False
0.01 100 True
0.01 100 False
0.01 1000 True
0.01 1000 False
0.001 10 True
0.001 10 False
0.001 100 True
0.001 100 False
0.001 1000 True
0.001 1000 False


In [None]:
for lr, batch_size, shuffle in product(*param_values): 
    comment = f' batch_size={batch_size} lr={lr} shuffle={shuffle}'
    network = Network()
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=shuffle)
    optimizer = optim.Adam(network.parameters(), lr=lr)
    images, labels = next(iter(train_loader))
    grid = torchvision.utils.make_grid(images)
    tb = SummaryWriter(comment=comment)
    tb.add_image('images', grid)
    tb.add_graph(network, images)
    for epoch in range(25):
        total_loss = 0
        total_correct = 0
        for batch in train_loader:
            images, labels = batch # Get Batch
            preds = network(images) # Pass Batch
            loss = F.cross_entropy(preds, labels) # Calculate Loss
            optimizer.zero_grad() # Zero Gradients
            loss.backward() # Calculate Gradients
            optimizer.step() # Update Weights

            total_loss += loss.item() * batch_size
            total_correct += get_num_correct(preds, labels)

        tb.add_scalar('Loss', total_loss, epoch)
        tb.add_scalar('Number Correct', total_correct, epoch)
        tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)

        for name, param in network.named_parameters():
            tb.add_histogram(name, param, epoch)
            tb.add_histogram(f'{name}.grad', param.grad, epoch)

        print("epoch", epoch, "total_correct:", total_correct, "loss:", total_loss)  
    tb.close()

epoch 0 total_correct: 46106 loss: 36976.01890651742
epoch 1 total_correct: 48232 loss: 32253.95067493315
epoch 2 total_correct: 48629 loss: 31062.413288153766
epoch 3 total_correct: 48735 loss: 31493.25378237292
epoch 4 total_correct: 48902 loss: 31069.599554706365
epoch 5 total_correct: 48856 loss: 31361.52362899622
epoch 6 total_correct: 49162 loss: 30502.29696228774
epoch 7 total_correct: 49308 loss: 31002.20635057427
epoch 8 total_correct: 49323 loss: 30967.16022528708
epoch 9 total_correct: 49470 loss: 30315.25373094715
epoch 10 total_correct: 49339 loss: 30997.397101920797
epoch 11 total_correct: 48822 loss: 32337.963016331196
