In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim 

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth = 120) 
torch.set_grad_enabled(True) 

from torch.utils.tensorboard import SummaryWriter #allows to send data to tensorboard files 

In [12]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [13]:
class Network(nn.Module):
        
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

        self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)


    def forward(self, t):
        
        # (1) input layer
        t = t
        
        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size = 2, stride =2)

        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size = 2, stride =2)

        # (4) hidden linear layer
        t = t.reshape(-1, 12*4*4)
        t = self.fc1(t)
        t = F.relu(t)
       
        # (5) hidden linear layer
        t = self.fc2(t)
        t = F.relu(t) 

        # (6) ouput layer
        t = self.out(t)
        return t

In [14]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'  
    ,train=True    
    ,download=True 
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [15]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle = True)

# Paramter lists

In [16]:
batch_size_list = [100, 1000, 10000]
lr_list = [0.01, 0.001, 0.0001, 0.00001]

# Nested iteration

In [31]:
for batch_size in batch_size_list:
    for lr in lr_list:
        network = Network()

        train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
        optimizer = optim.Adam(network.parameters(), lr=0.01)

        images,labels = next(iter(train_loader))
        grid = torchvision.utils.make_grid(images)

        comment = f'batch_size={batch_size} lr={lr}' #create a string called as comment nd pass it as a comment to summarywriter

        tb = SummaryWriter(comment=comment)
        tb.add_image('images', grid)
        tb.add_graph(network, images)

        for epoch in range(2):
    
            total_loss = 0
            total_correct = 0
    
#batch = next(iter(train_loader)) # Get a single batch

            for batch in train_loader: #get batch
                images, labels = batch #unpack the batch

                preds = network(images) #pass batch
                loss = F.cross_entropy(preds, labels) #calculate loss
    
                optimizer.zero_grad() #pytorch accumulates the grad after each pass of the batch so we make sure it's zero
                loss.backward() #calc gradients 
                optimizer.step() #update weights #each time we pass a batch the weights are updates, so if we have 100 batches, it updates 100 times and takes 100 steps towards the minimum of the loss function

########account loss calc for the batch size 
                total_loss += loss.item()*batch_size 
                total_correct += get_num_correct(preds, labels)

            tb.add_scalar('Loss', total_loss, epoch)
            tb.add_scalar('Number Correct', total_correct, epoch)
            tb.add_scalar('Accuracy', total_correct/len(train_set), epoch)

    #tb.add_histogram('con1.bias',network.conv1.bias,epoch)
    #tb.add_histogram('con1.weight',network.conv1.weight,epoch)
    #tb.add_histogram('con1.weight.grad',network.conv1.weight.grad,epoch)

    #############this below code works for all layers in the network
        for name,weight in network.named_parameters():
            tb.add_histogram(name, weight, epoch)
            tb.add_histogram(f'{name}.grad', weight.grad, epoch)

            print('epoch :', epoch, 'total_correct:', total_correct, 'loss :', total_loss)

        tb.close()

epoch : 1 total_correct: 51704 loss : 22485.33167243004
epoch : 1 total_correct: 51704 loss : 22485.33167243004
epoch : 1 total_correct: 51704 loss : 22485.33167243004
epoch : 1 total_correct: 51704 loss : 22485.33167243004
epoch : 1 total_correct: 51704 loss : 22485.33167243004
epoch : 1 total_correct: 51704 loss : 22485.33167243004
epoch : 1 total_correct: 51704 loss : 22485.33167243004
epoch : 1 total_correct: 51704 loss : 22485.33167243004
epoch : 1 total_correct: 51704 loss : 22485.33167243004
epoch : 1 total_correct: 51704 loss : 22485.33167243004
epoch : 1 total_correct: 51185 loss : 23778.030727803707
epoch : 1 total_correct: 51185 loss : 23778.030727803707
epoch : 1 total_correct: 51185 loss : 23778.030727803707
epoch : 1 total_correct: 51185 loss : 23778.030727803707
epoch : 1 total_correct: 51185 loss : 23778.030727803707
epoch : 1 total_correct: 51185 loss : 23778.030727803707
epoch : 1 total_correct: 51185 loss : 23778.030727803707
epoch : 1 total_correct: 51185 loss : 237

In [8]:
for name,weight in network.named_parameters():
    print(name, weight.shape)

conv1.weight torch.Size([6, 1, 5, 5])
conv1.bias torch.Size([6])
conv2.weight torch.Size([12, 6, 5, 5])
conv2.bias torch.Size([12])
fc1.weight torch.Size([120, 192])
fc1.bias torch.Size([120])
fc2.weight torch.Size([60, 120])
fc2.bias torch.Size([60])
out.weight torch.Size([10, 60])
out.bias torch.Size([10])


In [28]:
for name,weight in network.named_parameters():
    print(f'{name}.grad', weight.grad.shape)  #name of the layer, and add .grad to it 

conv1.weight.grad torch.Size([6, 1, 5, 5])
conv1.bias.grad torch.Size([6])
conv2.weight.grad torch.Size([12, 6, 5, 5])
conv2.bias.grad torch.Size([12])
fc1.weight.grad torch.Size([120, 192])
fc1.bias.grad torch.Size([120])
fc2.weight.grad torch.Size([60, 120])
fc2.bias.grad torch.Size([60])
out.weight.grad torch.Size([10, 60])
out.bias.grad torch.Size([10])


In [None]:
## But here we're using too many for loops 

In [25]:
from itertools import product  #computes cartesian product given multiple list inputs 

In [21]:
parameters = dict(lr = [0.01,0.001], 
                 batch_size = [10,100,1000], 
                 shuffle = [True, False])

In [22]:
param_values = [v for v in parameters.values()]
param_values 

[[0.01, 0.001], [10, 100, 1000], [True, False]]

In [26]:
for lr, batch_size, shuffle in product(*param_values): #star indicates each value of the list as an argument opposed to treating list itself as an argument
    print(lr, batch_size, shuffle)

0.01 10 True
0.01 10 False
0.01 100 True
0.01 100 False
0.01 1000 True
0.01 1000 False
0.001 10 True
0.001 10 False
0.001 100 True
0.001 100 False
0.001 1000 True
0.001 1000 False


The above allows us to work in a single loop no matter how many parameters we have 

In [30]:
for lr, batch_size, shuffle in product(*param_values):
    comment = f'batch_size = {batch_size} lr = {lr} shuffle={shuffle}'
