# Using optimizers

In [1]:
# Setting seeds to try and ensure we have the same results - this is not guaranteed across PyTorch releases.
import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

import numpy as np
np.random.seed(0)

In [2]:
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch import nn

mean, std = (0.5,), (0.5,)

# Create a transform and normalise data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(mean, std)
                              ])

# Download FMNIST training dataset and load training data
trainset = datasets.FashionMNIST('~/.pytorch/FMNIST/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download FMNIST test dataset and load test data
testset = datasets.FashionMNIST('~/.pytorch/FMNIST/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

In [3]:
class FMNIST(nn.Module):
  def __init__(self):
    super().__init__()
    self.fc1 = nn.Linear(784, 128)
    self.fc2 = nn.Linear(128,64)
    self.fc3 = nn.Linear(64,10)
    
  def forward(self, x):
    x = x.view(x.shape[0], -1)
    
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    x = F.log_softmax(x, dim=1)
    
    return x
    
#model = FMNIST()   

In [4]:
model = nn.Sequential(nn.Linear(784, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Linear(64, 10),
                      nn.LogSoftmax(dim=1))

In [5]:
images, labels = next(iter(trainloader))
images = images.view(images.shape[0], -1)

In [6]:
from torch import optim

criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)


In [7]:
output = model(images)
loss = criterion(output, labels)
loss.backward()
print('Initial weights : ',model[0].weight)
print('Initial weights gradient : ',model[0].weight.grad)
        

Initial weights :  Parameter containing:
tensor([[-0.0003,  0.0192, -0.0294,  ...,  0.0219,  0.0037,  0.0021],
        [-0.0198, -0.0150, -0.0104,  ..., -0.0203, -0.0060, -0.0299],
        [-0.0201,  0.0149, -0.0333,  ..., -0.0203,  0.0012,  0.0080],
        ...,
        [ 0.0018, -0.0295,  0.0085,  ..., -0.0037,  0.0036,  0.0300],
        [-0.0233, -0.0220, -0.0064,  ...,  0.0115, -0.0324, -0.0158],
        [ 0.0309,  0.0066,  0.0125,  ...,  0.0286,  0.0350, -0.0105]],
       requires_grad=True)
Initial weights gradient :  tensor([[-7.2154e-04, -7.2154e-04, -7.2154e-04,  ..., -7.1856e-04,
         -7.2154e-04, -7.2154e-04],
        [ 1.5117e-03,  1.5168e-03,  1.5292e-03,  ...,  1.5526e-03,
          1.5154e-03,  1.5168e-03],
        [-4.5585e-04, -4.5585e-04, -4.5585e-04,  ..., -4.5585e-04,
         -4.5585e-04, -4.5585e-04],
        ...,
        [-9.6583e-05, -9.6583e-05, -9.6583e-05,  ..., -1.5217e-04,
         -9.6583e-05, -9.6583e-05],
        [-5.6446e-04, -5.6559e-04, -5.5394e-0

In [9]:
optimizer.step()

In [10]:
print('Initial weights : ',model[0].weight)
print('Initial weights gradient : ',model[0].weight.grad)

Initial weights :  Parameter containing:
tensor([[-0.0003,  0.0192, -0.0294,  ...,  0.0219,  0.0037,  0.0021],
        [-0.0198, -0.0150, -0.0105,  ..., -0.0203, -0.0060, -0.0300],
        [-0.0201,  0.0149, -0.0333,  ..., -0.0203,  0.0012,  0.0080],
        ...,
        [ 0.0018, -0.0295,  0.0085,  ..., -0.0037,  0.0036,  0.0300],
        [-0.0232, -0.0220, -0.0064,  ...,  0.0115, -0.0324, -0.0158],
        [ 0.0309,  0.0065,  0.0125,  ...,  0.0285,  0.0349, -0.0106]],
       requires_grad=True)
Initial weights gradient :  tensor([[-7.2154e-04, -7.2154e-04, -7.2154e-04,  ..., -7.1856e-04,
         -7.2154e-04, -7.2154e-04],
        [ 1.5117e-03,  1.5168e-03,  1.5292e-03,  ...,  1.5526e-03,
          1.5154e-03,  1.5168e-03],
        [-4.5585e-04, -4.5585e-04, -4.5585e-04,  ..., -4.5585e-04,
         -4.5585e-04, -4.5585e-04],
        ...,
        [-9.6583e-05, -9.6583e-05, -9.6583e-05,  ..., -1.5217e-04,
         -9.6583e-05, -9.6583e-05],
        [-5.6446e-04, -5.6559e-04, -5.5394e-0

In [11]:
optimizer.zero_grad()

In [12]:
print('Initial weights : ',model[0].weight)
print('Initial weights gradient : ',model[0].weight.grad)

Initial weights :  Parameter containing:
tensor([[-0.0003,  0.0192, -0.0294,  ...,  0.0219,  0.0037,  0.0021],
        [-0.0198, -0.0150, -0.0105,  ..., -0.0203, -0.0060, -0.0300],
        [-0.0201,  0.0149, -0.0333,  ..., -0.0203,  0.0012,  0.0080],
        ...,
        [ 0.0018, -0.0295,  0.0085,  ..., -0.0037,  0.0036,  0.0300],
        [-0.0232, -0.0220, -0.0064,  ...,  0.0115, -0.0324, -0.0158],
        [ 0.0309,  0.0065,  0.0125,  ...,  0.0285,  0.0349, -0.0106]],
       requires_grad=True)
Initial weights gradient :  tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


In [14]:
model = FMNIST()
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

num_epochs = 1

for i in range(num_epochs):
    cum_loss = 0
    batch_num = 0
    
    for batch_num,(images, labels) in enumerate(trainloader,1):
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        cum_loss += loss.item()
        print(f'Batch: {batch_num}, Loss: {loss.item()}')
     
    print(f"Training loss: {cum_loss/len(trainloader)}")

Batch: 1, Loss: 2.288628339767456
Batch: 2, Loss: 2.3080620765686035
Batch: 3, Loss: 2.3098013401031494
Batch: 4, Loss: 2.284043312072754
Batch: 5, Loss: 2.291738986968994
Batch: 6, Loss: 2.3061680793762207
Batch: 7, Loss: 2.2870423793792725
Batch: 8, Loss: 2.276984453201294
Batch: 9, Loss: 2.2776780128479004
Batch: 10, Loss: 2.3024075031280518
Batch: 11, Loss: 2.270155906677246
Batch: 12, Loss: 2.288559913635254
Batch: 13, Loss: 2.2820968627929688
Batch: 14, Loss: 2.2649171352386475
Batch: 15, Loss: 2.274873733520508
Batch: 16, Loss: 2.2665631771087646
Batch: 17, Loss: 2.2486894130706787
Batch: 18, Loss: 2.25093150138855
Batch: 19, Loss: 2.2826104164123535
Batch: 20, Loss: 2.2532405853271484
Batch: 21, Loss: 2.234309434890747
Batch: 22, Loss: 2.2593002319335938
Batch: 23, Loss: 2.248821496963501
Batch: 24, Loss: 2.2512760162353516
Batch: 25, Loss: 2.257256507873535
Batch: 26, Loss: 2.2401793003082275
Batch: 27, Loss: 2.214656352996826
Batch: 28, Loss: 2.226203441619873
Batch: 29, Loss

Batch: 230, Loss: 1.2443780899047852
Batch: 231, Loss: 1.2140682935714722
Batch: 232, Loss: 1.3427790403366089
Batch: 233, Loss: 1.115907907485962
Batch: 234, Loss: 1.2040200233459473
Batch: 235, Loss: 1.4018675088882446
Batch: 236, Loss: 1.3054977655410767
Batch: 237, Loss: 1.1856940984725952
Batch: 238, Loss: 1.2555208206176758
Batch: 239, Loss: 1.1556650400161743
Batch: 240, Loss: 1.1539603471755981
Batch: 241, Loss: 1.1964763402938843
Batch: 242, Loss: 1.1661659479141235
Batch: 243, Loss: 1.3382189273834229
Batch: 244, Loss: 1.0965142250061035
Batch: 245, Loss: 1.173755168914795
Batch: 246, Loss: 1.282906174659729
Batch: 247, Loss: 1.1114957332611084
Batch: 248, Loss: 1.2254294157028198
Batch: 249, Loss: 1.1858657598495483
Batch: 250, Loss: 1.1011098623275757
Batch: 251, Loss: 1.23004949092865
Batch: 252, Loss: 1.1434880495071411
Batch: 253, Loss: 1.0658663511276245
Batch: 254, Loss: 1.0187327861785889
Batch: 255, Loss: 1.2080317735671997
Batch: 256, Loss: 1.1579567193984985
Batch:

Batch: 464, Loss: 0.81292724609375
Batch: 465, Loss: 0.8566833734512329
Batch: 466, Loss: 0.7959210872650146
Batch: 467, Loss: 0.8799583315849304
Batch: 468, Loss: 0.8677557706832886
Batch: 469, Loss: 0.8151605129241943
Batch: 470, Loss: 0.7727620005607605
Batch: 471, Loss: 0.7839208841323853
Batch: 472, Loss: 0.7476712465286255
Batch: 473, Loss: 0.8931441307067871
Batch: 474, Loss: 0.7824521660804749
Batch: 475, Loss: 0.7838314175605774
Batch: 476, Loss: 0.7332865595817566
Batch: 477, Loss: 0.7414096593856812
Batch: 478, Loss: 0.7301003336906433
Batch: 479, Loss: 0.864790141582489
Batch: 480, Loss: 0.7664608359336853
Batch: 481, Loss: 0.6764508485794067
Batch: 482, Loss: 0.826641857624054
Batch: 483, Loss: 0.815302848815918
Batch: 484, Loss: 0.8251277208328247
Batch: 485, Loss: 0.6247996687889099
Batch: 486, Loss: 0.7449228167533875
Batch: 487, Loss: 0.7727482914924622
Batch: 488, Loss: 0.6419260501861572
Batch: 489, Loss: 0.8319789171218872
Batch: 490, Loss: 0.7450084686279297
Batch:

Batch: 690, Loss: 0.7255526781082153
Batch: 691, Loss: 0.7232828140258789
Batch: 692, Loss: 0.722409188747406
Batch: 693, Loss: 0.7636272311210632
Batch: 694, Loss: 0.7034024000167847
Batch: 695, Loss: 0.7502437829971313
Batch: 696, Loss: 0.8505018353462219
Batch: 697, Loss: 0.6978367567062378
Batch: 698, Loss: 0.7250263094902039
Batch: 699, Loss: 0.8582481741905212
Batch: 700, Loss: 0.8512320518493652
Batch: 701, Loss: 0.6327056884765625
Batch: 702, Loss: 0.6065342426300049
Batch: 703, Loss: 0.6422102451324463
Batch: 704, Loss: 0.6038904786109924
Batch: 705, Loss: 0.6565707921981812
Batch: 706, Loss: 0.6937658786773682
Batch: 707, Loss: 0.6600814461708069
Batch: 708, Loss: 0.7112292051315308
Batch: 709, Loss: 0.7098615765571594
Batch: 710, Loss: 0.5268065929412842
Batch: 711, Loss: 0.6000372171401978
Batch: 712, Loss: 0.5753609538078308
Batch: 713, Loss: 0.687133252620697
Batch: 714, Loss: 0.8461347818374634
Batch: 715, Loss: 0.8451234102249146
Batch: 716, Loss: 0.5139820575714111
Bat

Batch: 914, Loss: 0.6200928688049316
Batch: 915, Loss: 0.7076222896575928
Batch: 916, Loss: 0.6444756984710693
Batch: 917, Loss: 0.7690625190734863
Batch: 918, Loss: 0.6698958873748779
Batch: 919, Loss: 0.827814519405365
Batch: 920, Loss: 0.728588342666626
Batch: 921, Loss: 0.5826833248138428
Batch: 922, Loss: 0.5327664017677307
Batch: 923, Loss: 0.6287207007408142
Batch: 924, Loss: 0.5486977100372314
Batch: 925, Loss: 0.7654973864555359
Batch: 926, Loss: 0.5996663570404053
Batch: 927, Loss: 0.6182467341423035
Batch: 928, Loss: 0.6924936175346375
Batch: 929, Loss: 0.4430564045906067
Batch: 930, Loss: 0.7466434836387634
Batch: 931, Loss: 0.5913838148117065
Batch: 932, Loss: 0.5907655358314514
Batch: 933, Loss: 0.6467644572257996
Batch: 934, Loss: 0.5608280897140503
Batch: 935, Loss: 0.4923921823501587
Batch: 936, Loss: 0.6768632531166077
Batch: 937, Loss: 0.4640061855316162
Batch: 938, Loss: 0.5052637457847595
Training loss: 1.0306918401199618


In [15]:
60000/64

937.5