# Using optimizers

In [1]:
# Setting seeds to try and ensure we have the same results - this is not guaranteed across PyTorch releases.
import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

import numpy as np
np.random.seed(0)

In [2]:
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch import nn

mean, std = (0.5,), (0.5,)

# Create a transform and normalise data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(mean, std)
                              ])

# Download FMNIST training dataset and load training data
trainset = datasets.FashionMNIST('~/.pytorch/FMNIST/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download FMNIST test dataset and load test data
testset = datasets.FashionMNIST('~/.pytorch/FMNIST/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

In [3]:
class FMNIST(nn.Module):
  def __init__(self):
    super().__init__()
    self.fc1 = nn.Linear(784, 128)
    self.fc2 = nn.Linear(128,64)
    self.fc3 = nn.Linear(64,10)
    
  def forward(self, x):
    x = x.view(x.shape[0], -1)
    
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    x = F.log_softmax(x, dim=1)
    
    return x
    
#model = FMNIST()   

In [4]:
model = nn.Sequential(nn.Linear(784, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Linear(64, 10),
                      nn.LogSoftmax(dim=1))

In [5]:
images, labels = next(iter(trainloader))
images = images.view(images.shape[0], -1)

In [6]:
from torch import optim

criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)


In [7]:
output = model(images)
loss = criterion(output, labels)
loss.backward()
print('Initial weights : ',model[0].weight)
print('Initial weights gradient : ',model[0].weight.grad)
        

Initial weights :  Parameter containing:
tensor([[-0.0003,  0.0192, -0.0294,  ...,  0.0219,  0.0037,  0.0021],
        [-0.0198, -0.0150, -0.0104,  ..., -0.0203, -0.0060, -0.0299],
        [-0.0201,  0.0149, -0.0333,  ..., -0.0203,  0.0012,  0.0080],
        ...,
        [ 0.0018, -0.0295,  0.0085,  ..., -0.0037,  0.0036,  0.0300],
        [-0.0233, -0.0220, -0.0064,  ...,  0.0115, -0.0324, -0.0158],
        [ 0.0309,  0.0066,  0.0125,  ...,  0.0286,  0.0350, -0.0105]],
       requires_grad=True)
Initial weights gradient :  tensor([[-0.0004, -0.0004, -0.0004,  ..., -0.0007, -0.0006, -0.0004],
        [ 0.0069,  0.0069,  0.0069,  ...,  0.0072,  0.0070,  0.0069],
        [-0.0015, -0.0015, -0.0015,  ..., -0.0016, -0.0015, -0.0015],
        ...,
        [ 0.0018,  0.0018,  0.0018,  ...,  0.0017,  0.0017,  0.0018],
        [ 0.0019,  0.0019,  0.0019,  ...,  0.0019,  0.0019,  0.0019],
        [ 0.0017,  0.0017,  0.0017,  ...,  0.0016,  0.0017,  0.0017]])


In [8]:
optimizer.step() # updating the weights using the optimizer

In [9]:
print('Initial weights : ',model[0].weight)
print('Initial weights gradient : ',model[0].weight.grad)

Initial weights :  Parameter containing:
tensor([[-0.0003,  0.0192, -0.0294,  ...,  0.0219,  0.0037,  0.0021],
        [-0.0198, -0.0151, -0.0105,  ..., -0.0203, -0.0060, -0.0300],
        [-0.0201,  0.0149, -0.0333,  ..., -0.0203,  0.0012,  0.0080],
        ...,
        [ 0.0018, -0.0296,  0.0085,  ..., -0.0037,  0.0036,  0.0300],
        [-0.0233, -0.0221, -0.0064,  ...,  0.0115, -0.0324, -0.0158],
        [ 0.0309,  0.0066,  0.0125,  ...,  0.0285,  0.0350, -0.0105]],
       requires_grad=True)
Initial weights gradient :  tensor([[-0.0004, -0.0004, -0.0004,  ..., -0.0007, -0.0006, -0.0004],
        [ 0.0069,  0.0069,  0.0069,  ...,  0.0072,  0.0070,  0.0069],
        [-0.0015, -0.0015, -0.0015,  ..., -0.0016, -0.0015, -0.0015],
        ...,
        [ 0.0018,  0.0018,  0.0018,  ...,  0.0017,  0.0017,  0.0018],
        [ 0.0019,  0.0019,  0.0019,  ...,  0.0019,  0.0019,  0.0019],
        [ 0.0017,  0.0017,  0.0017,  ...,  0.0016,  0.0017,  0.0017]])


In [10]:
optimizer.zero_grad() # zeroing the gradient after each calculation

In [11]:
print('Initial weights : ',model[0].weight)
print('Initial weights gradient : ',model[0].weight.grad)

Initial weights :  Parameter containing:
tensor([[-0.0003,  0.0192, -0.0294,  ...,  0.0219,  0.0037,  0.0021],
        [-0.0198, -0.0151, -0.0105,  ..., -0.0203, -0.0060, -0.0300],
        [-0.0201,  0.0149, -0.0333,  ..., -0.0203,  0.0012,  0.0080],
        ...,
        [ 0.0018, -0.0296,  0.0085,  ..., -0.0037,  0.0036,  0.0300],
        [-0.0233, -0.0221, -0.0064,  ...,  0.0115, -0.0324, -0.0158],
        [ 0.0309,  0.0066,  0.0125,  ...,  0.0285,  0.0350, -0.0105]],
       requires_grad=True)
Initial weights gradient :  tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


In [13]:
model = FMNIST()
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

num_epochs = 1

for i in range(num_epochs):
    cum_loss = 0
    bs_num = 0

    for bs_num, (images, labels) in enumerate(trainloader, 1):
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        cum_loss += loss.item()
        print(f'Batch: {bs_num}, Loss : {loss.item()}')
     
    print(f"Training loss: {cum_loss/len(trainloader)}")

Batch: 1, Loss : 2.3273115158081055
Batch: 2, Loss : 2.293184280395508
Batch: 3, Loss : 2.3112661838531494
Batch: 4, Loss : 2.3118538856506348
Batch: 5, Loss : 2.2956676483154297
Batch: 6, Loss : 2.293297529220581
Batch: 7, Loss : 2.2968060970306396
Batch: 8, Loss : 2.270711660385132
Batch: 9, Loss : 2.2949740886688232
Batch: 10, Loss : 2.294889450073242
Batch: 11, Loss : 2.3100788593292236
Batch: 12, Loss : 2.2990283966064453
Batch: 13, Loss : 2.2964749336242676
Batch: 14, Loss : 2.2953503131866455
Batch: 15, Loss : 2.266622543334961
Batch: 16, Loss : 2.2846601009368896
Batch: 17, Loss : 2.2752439975738525
Batch: 18, Loss : 2.261687755584717
Batch: 19, Loss : 2.293064832687378
Batch: 20, Loss : 2.27963924407959
Batch: 21, Loss : 2.255924701690674
Batch: 22, Loss : 2.262437343597412
Batch: 23, Loss : 2.254652976989746
Batch: 24, Loss : 2.2678794860839844
Batch: 25, Loss : 2.2677180767059326
Batch: 26, Loss : 2.240208387374878
Batch: 27, Loss : 2.2844810485839844
Batch: 28, Loss : 2.252

Batch: 221, Loss : 1.3204445838928223
Batch: 222, Loss : 1.2740366458892822
Batch: 223, Loss : 1.3130650520324707
Batch: 224, Loss : 1.2673839330673218
Batch: 225, Loss : 1.2436292171478271
Batch: 226, Loss : 1.226189374923706
Batch: 227, Loss : 1.2443219423294067
Batch: 228, Loss : 1.3343570232391357
Batch: 229, Loss : 1.1850758790969849
Batch: 230, Loss : 1.3133430480957031
Batch: 231, Loss : 1.318487524986267
Batch: 232, Loss : 1.4127391576766968
Batch: 233, Loss : 1.3631834983825684
Batch: 234, Loss : 1.316428780555725
Batch: 235, Loss : 1.2542226314544678
Batch: 236, Loss : 1.312017560005188
Batch: 237, Loss : 1.2638838291168213
Batch: 238, Loss : 1.2465494871139526
Batch: 239, Loss : 1.2897800207138062
Batch: 240, Loss : 1.2794121503829956
Batch: 241, Loss : 1.194608211517334
Batch: 242, Loss : 1.2498332262039185
Batch: 243, Loss : 1.1584415435791016
Batch: 244, Loss : 1.1943132877349854
Batch: 245, Loss : 1.2157005071640015
Batch: 246, Loss : 1.2400010824203491
Batch: 247, Loss 

Batch: 437, Loss : 0.7536025047302246
Batch: 438, Loss : 0.7344563007354736
Batch: 439, Loss : 0.7935022115707397
Batch: 440, Loss : 0.8040387034416199
Batch: 441, Loss : 0.8050840497016907
Batch: 442, Loss : 0.8211210370063782
Batch: 443, Loss : 0.8516119122505188
Batch: 444, Loss : 0.7371495366096497
Batch: 445, Loss : 0.6923138499259949
Batch: 446, Loss : 0.8441256880760193
Batch: 447, Loss : 0.7863665223121643
Batch: 448, Loss : 0.7977224588394165
Batch: 449, Loss : 0.8043186068534851
Batch: 450, Loss : 0.6792334318161011
Batch: 451, Loss : 0.8397294282913208
Batch: 452, Loss : 0.8697363138198853
Batch: 453, Loss : 0.771031379699707
Batch: 454, Loss : 0.8126727342605591
Batch: 455, Loss : 0.6270816326141357
Batch: 456, Loss : 0.8570786118507385
Batch: 457, Loss : 0.842613697052002
Batch: 458, Loss : 0.8717027306556702
Batch: 459, Loss : 0.7391253709793091
Batch: 460, Loss : 0.7095186710357666
Batch: 461, Loss : 0.842334508895874
Batch: 462, Loss : 0.647293210029602
Batch: 463, Loss

Batch: 653, Loss : 0.7317698001861572
Batch: 654, Loss : 0.7479687929153442
Batch: 655, Loss : 0.8091865181922913
Batch: 656, Loss : 0.7208266854286194
Batch: 657, Loss : 0.6284978985786438
Batch: 658, Loss : 0.6035612225532532
Batch: 659, Loss : 0.6831122040748596
Batch: 660, Loss : 0.6508500576019287
Batch: 661, Loss : 0.8517587184906006
Batch: 662, Loss : 0.708644449710846
Batch: 663, Loss : 0.7897759675979614
Batch: 664, Loss : 0.7139507532119751
Batch: 665, Loss : 0.6474398970603943
Batch: 666, Loss : 0.6890150904655457
Batch: 667, Loss : 0.7021757960319519
Batch: 668, Loss : 0.7152019143104553
Batch: 669, Loss : 0.6328343749046326
Batch: 670, Loss : 0.788378894329071
Batch: 671, Loss : 0.6253613829612732
Batch: 672, Loss : 0.6547462940216064
Batch: 673, Loss : 0.6393874287605286
Batch: 674, Loss : 0.8240779042243958
Batch: 675, Loss : 0.7207798361778259
Batch: 676, Loss : 0.7777431011199951
Batch: 677, Loss : 0.7428613305091858
Batch: 678, Loss : 0.7353305816650391
Batch: 679, Lo

Batch: 869, Loss : 0.527176558971405
Batch: 870, Loss : 0.49753671884536743
Batch: 871, Loss : 0.5303326845169067
Batch: 872, Loss : 0.702815592288971
Batch: 873, Loss : 0.550304651260376
Batch: 874, Loss : 0.5853625535964966
Batch: 875, Loss : 0.8411471247673035
Batch: 876, Loss : 0.5865780115127563
Batch: 877, Loss : 0.6469947099685669
Batch: 878, Loss : 0.5588973164558411
Batch: 879, Loss : 0.49517878890037537
Batch: 880, Loss : 0.5952535271644592
Batch: 881, Loss : 0.6198689341545105
Batch: 882, Loss : 0.6277535557746887
Batch: 883, Loss : 0.9258030652999878
Batch: 884, Loss : 0.646431028842926
Batch: 885, Loss : 0.6362239122390747
Batch: 886, Loss : 0.8353440761566162
Batch: 887, Loss : 0.7380415797233582
Batch: 888, Loss : 0.8363584280014038
Batch: 889, Loss : 0.5739362835884094
Batch: 890, Loss : 0.6972647905349731
Batch: 891, Loss : 0.6996551752090454
Batch: 892, Loss : 0.7093980312347412
Batch: 893, Loss : 0.7814003229141235
Batch: 894, Loss : 0.6296308636665344
Batch: 895, Lo