In [1]:
import torch 
import torchvision 
import torchvision.transforms as transforms
import torch.nn.functional as F 
import torch.nn as nn 
import torch.optim as optim

from torch.utils.tensorboard import SummaryWriter

In [2]:
def get_num_correct(prediction,labelss):
    return prediction.argmax(dim=1).eq(labelss).sum().item()

In [3]:
class Network(nn.Module):
    def __init__(self):
        super(Network,self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1,out_channels=6,kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6,out_channels=12,kernel_size=5)
        #dence layer
        self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10) 
        
    def forward(self,t):
        #input 
        t=t 
        #hideen conv layer
        t =self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t,kernel_size=2,stride=2)
        
        # con 2nd layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t,kernel_size=2,stride=2)
        
        t= t.reshape(-1,12*4*4)
        t= self.fc1(t)
        t = F.relu(t)
        
        t= self.fc2(t)
        t = F.relu(t)
        
        t = self.out(t)
        
        return t

In [4]:
train_set = torchvision.datasets.FashionMNIST(root="", download=True,
                 train=True,
                 transform=transforms.Compose([transforms.ToTensor()]))

In [5]:
train_loader = torch.utils.data.DataLoader(train_set,batch_size=100,shuffle=True)

# Starting TensorBoard

In [8]:
tb = SummaryWriter()

network = Network()
images , labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb.add_image('images',grid)
tb.add_graph(network,images)
tb.close()

In [11]:
network = Network()
train_loader = torch.utils.data.DataLoader(
                       train_set,
                       batch_size=100,shuffle=True)
optimizer = optim.Adam(network.parameters(),lr=0.01)

images , labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb = SummaryWriter()
tb.add_image('images',grid)
tb.add_graph(network,images)


for epoch in range(10):

    total_loss = 0 
    total_correct = 0


    for batch in train_loader:
        
        images , labels = batch 

        preds = network(images)

        loss= F.cross_entropy(preds,labels)

        optimizer.zero_grad() #pyptorch accumulate add to currently
        loss.backward() #Gradient
        optimizer.step() #update weight

        total_loss += loss.item()
        total_correct += get_num_correct(preds,labels)
        
        
    tb.add_scalar('Loss',total_loss,epoch)
    tb.add_scalar('Number Correct',total_correct,epoch)
    tb.add_scalar('Accuracy',total_correct/len(train_set),epoch)
    
    tb.add_histogram('conv1.bias',network.conv1.bias,epoch)
    tb.add_histogram('conv1.weight',network.conv1.weight,epoch)
    tb.add_histogram('conv1.weight.grad',network.conv1.weight.grad,epoch)

    print(f'EPOCH {epoch} total_correct {total_correct} loss {total_loss}')
    
    
tb.close()
    

EPOCH 0 total_correct 46595 loss 352.69832211732864
EPOCH 1 total_correct 51407 loss 232.48688957095146
EPOCH 2 total_correct 52118 loss 212.7272652387619
EPOCH 3 total_correct 52524 loss 203.45418843626976
EPOCH 4 total_correct 52808 loss 193.5994174927473
EPOCH 5 total_correct 53079 loss 189.41920086741447
EPOCH 6 total_correct 53131 loss 187.23474073410034
EPOCH 7 total_correct 53310 loss 182.19186797738075
EPOCH 8 total_correct 53508 loss 178.517046071589
EPOCH 9 total_correct 53379 loss 180.32867415249348


# Hyperparameters

In [15]:
from itertools import product

In [16]:
parameters = dict(

    lr = [.01,.001],
    batch_size = [10,100,1000],
    shuffle = [True,False]

)

In [17]:
param_value = [v for v in parameters.values()]
param_value

[[0.01, 0.001], [10, 100, 1000], [True, False]]

In [18]:
for lr,batch_size , shuffle in product(*param_value):
    print(lr,batch_size,shuffle)

0.01 10 True
0.01 10 False
0.01 100 True
0.01 100 False
0.01 1000 True
0.01 1000 False
0.001 10 True
0.001 10 False
0.001 100 True
0.001 100 False
0.001 1000 True
0.001 1000 False


In [21]:
batch_size_list = [100, 1000, 10000]
lr_list = [.01, .001, .0001, .00001]

In [22]:
for batch_size in batch_size_list:
    for lr in lr_list:
        network = Network()
        train_loader = torch.utils.data.DataLoader(
                               train_set,
                               batch_size=batch_size,shuffle=True)
        optimizer = optim.Adam(network.parameters(),lr=lr)

        images , labels = next(iter(train_loader))
        grid = torchvision.utils.make_grid(images)


        #adding comment
        comment = f'batch_size={batch_size} lr={lr}'
        tb = SummaryWriter(comment=comment)
        tb.add_image('images',grid)
        tb.add_graph(network,images)


        for epoch in range(2):

            total_loss = 0 
            total_correct = 0


            for batch in train_loader:

                images , labels = batch 

                preds = network(images)

                loss= F.cross_entropy(preds,labels)

                optimizer.zero_grad() #pyptorch accumulate add to currently
                loss.backward() #Gradient
                optimizer.step() #update weight

                total_loss += loss.item() * batch_size #to make compare b/w batch changes
                total_correct += get_num_correct(preds,labels)


            tb.add_scalar('Loss',total_loss,epoch)
            tb.add_scalar('Number Correct',total_correct,epoch)
            tb.add_scalar('Accuracy',total_correct/len(train_set),epoch)

            #tb.add_histogram('conv1.bias',network.conv1.bias,epoch)
            #tb.add_histogram('conv1.weight',network.conv1.weight,epoch)
            #tb.add_histogram('conv1.weight.grad',network.conv1.weight.grad,epoch)

            for name , weight in network.named_parameters():
                tb.add_histogram(name,weight,epoch)
                tb.add_histogram(f'{name}.grad',weight.grad,epoch)


            print(f'EPOCH {epoch} total_correct {total_correct} loss {total_loss}')

    
tb.close()
    

EPOCH 0 total_correct 45732 loss 36904.51911687851
EPOCH 1 total_correct 50204 loss 25856.083171069622
EPOCH 0 total_correct 42590 loss 45871.03310525417
EPOCH 1 total_correct 48859 loss 29622.759974002838
EPOCH 0 total_correct 29324 loss 89340.07390737534
EPOCH 1 total_correct 43671 loss 42613.14267218113
EPOCH 0 total_correct 9805 loss 137713.30435276031
EPOCH 1 total_correct 16553 loss 132185.2979183197
EPOCH 0 total_correct 37538 loss 58923.51734638214
EPOCH 1 total_correct 47744 loss 31697.937190532684
EPOCH 0 total_correct 30107 loss 90586.90059185028
EPOCH 1 total_correct 42550 loss 45181.77509307861
EPOCH 0 total_correct 10856 loss 136693.22800636292
EPOCH 1 total_correct 21593 loss 125096.25470638275
EPOCH 0 total_correct 6000 loss 138280.2619934082
EPOCH 1 total_correct 6000 loss 138246.4828491211
EPOCH 0 total_correct 14025 loss 126959.14268493652
EPOCH 1 total_correct 31238 loss 80008.64267349243


KeyboardInterrupt: 

In [13]:
for name , weight in network.named_parameters():
    print(name,weight.shape)

conv1.weight torch.Size([6, 1, 5, 5])
conv1.bias torch.Size([6])
conv2.weight torch.Size([12, 6, 5, 5])
conv2.bias torch.Size([12])
fc1.weight torch.Size([120, 192])
fc1.bias torch.Size([120])
fc2.weight torch.Size([60, 120])
fc2.bias torch.Size([60])
out.weight torch.Size([10, 60])
out.bias torch.Size([10])


In [14]:
for name , weight in network.named_parameters():
    print(f'{name}.grad',weight.grad.shape)

conv1.weight.grad torch.Size([6, 1, 5, 5])
conv1.bias.grad torch.Size([6])
conv2.weight.grad torch.Size([12, 6, 5, 5])
conv2.bias.grad torch.Size([12])
fc1.weight.grad torch.Size([120, 192])
fc1.bias.grad torch.Size([120])
fc2.weight.grad torch.Size([60, 120])
fc2.bias.grad torch.Size([60])
out.weight.grad torch.Size([10, 60])
out.bias.grad torch.Size([10])
