In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms
transform = transforms.Compose([transforms.ToTensor(),
                               transforms.Normalize((0.5),(0.5)),
                               ])
trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

In [5]:
model = nn.Sequential(nn.Linear(784,128),
                     nn.ReLU(),
                     nn.Linear(128,64),
                     nn.ReLU(),
                     nn.Linear(64,10))
criterion = nn.CrossEntropyLoss()
images, labels = next(iter(trainloader))
images = images.view(images.shape[0], -1)
logits = model(images)
loss = criterion(logits, labels)
print(loss)

tensor(2.3234, grad_fn=<NllLossBackward>)


In [13]:
model = nn.Sequential(nn.Linear(784,128),
                     nn.ReLU(),
                     nn.Linear(128,64),
                     nn.ReLU(),
                     nn.Linear(64,10),
                     nn.LogSoftmax(dim=1))
criterion = nn.NLLLoss()

images,labels = next(iter(trainloader))
images = images.view(images.shape[0],-1)
logits = model(images)
loss = criterion(logits,labels)
print(loss)

tensor(2.3421, grad_fn=<NllLossBackward>)


In [14]:
x = torch.randn(2,2,requires_grad=True)
print(x)

tensor([[-0.2614, -1.6814],
        [ 0.3950, -1.3915]], requires_grad=True)


In [15]:
y = x**2
print(y)

tensor([[0.0683, 2.8273],
        [0.1561, 1.9364]], grad_fn=<PowBackward0>)


In [17]:
print(iter)

<built-in function iter>


In [18]:
print(next)

<built-in function next>


In [19]:
print(y.grad_fn)

<PowBackward0 object at 0x7f5db881beb0>


In [20]:
p = y.mean()
print(p)

tensor(1.2470, grad_fn=<MeanBackward0>)


In [21]:
p.backward()
print(x.grad)
print(x/2)

tensor([[-0.1307, -0.8407],
        [ 0.1975, -0.6958]])
tensor([[-0.1307, -0.8407],
        [ 0.1975, -0.6958]], grad_fn=<DivBackward0>)


In [25]:
model = nn.Sequential(nn.Linear(784, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Linear(64, 10),
                      nn.LogSoftmax(dim=1))
criterion = nn.NLLLoss()
images, labels = next(iter(trainloader))
images = images.view(images.shape[0], -1)
logits = model(images)
loss = criterion(logits, labels)

In [26]:
print('Before backward pass: \n', model[0].weight.grad)
loss.backward()
print('After backward pass: \n', model[0].weight.grad)

Before backward pass: 
 None
After backward pass: 
 tensor([[-1.0361e-03, -1.0361e-03, -1.0361e-03,  ..., -1.0361e-03,
         -1.0361e-03, -1.0361e-03],
        [-6.0623e-05, -6.0623e-05, -6.0623e-05,  ..., -6.0623e-05,
         -6.0623e-05, -6.0623e-05],
        [-1.4973e-03, -1.4973e-03, -1.4973e-03,  ..., -1.4973e-03,
         -1.4973e-03, -1.4973e-03],
        ...,
        [ 7.7462e-06,  7.7462e-06,  7.7462e-06,  ...,  7.7462e-06,
          7.7462e-06,  7.7462e-06],
        [-5.2127e-04, -5.2127e-04, -5.2127e-04,  ..., -5.2127e-04,
         -5.2127e-04, -5.2127e-04],
        [ 6.2140e-04,  6.2140e-04,  6.2140e-04,  ...,  6.2140e-04,
          6.2140e-04,  6.2140e-04]])


In [27]:
# Training the network
from torch import optim
optimizer = optim.SGD(model.parameters(), lr = 0.01)


In [33]:
print(model[0].weight)
images, labels = next(iter(trainloader))
images.resize_(64, 784)

optimizer.zero_grad()
output =model(images)
loss = criterion(output, labels)
loss.backward()
print(model[0].weight.grad)

Parameter containing:
tensor([[ 0.0191,  0.0279,  0.0125,  ..., -0.0194,  0.0042,  0.0076],
        [-0.0182, -0.0089, -0.0232,  ...,  0.0235,  0.0255,  0.0159],
        [-0.0251, -0.0054, -0.0063,  ..., -0.0026, -0.0145,  0.0051],
        ...,
        [ 0.0272, -0.0195,  0.0225,  ...,  0.0215, -0.0240, -0.0084],
        [ 0.0110,  0.0165,  0.0074,  ..., -0.0256,  0.0208, -0.0005],
        [-0.0106, -0.0063, -0.0246,  ...,  0.0010,  0.0326, -0.0346]],
       requires_grad=True)
tensor([[-0.0004, -0.0004, -0.0004,  ..., -0.0004, -0.0004, -0.0004],
        [ 0.0008,  0.0008,  0.0008,  ...,  0.0008,  0.0008,  0.0008],
        [ 0.0016,  0.0016,  0.0016,  ...,  0.0016,  0.0016,  0.0016],
        ...,
        [-0.0015, -0.0015, -0.0015,  ..., -0.0015, -0.0015, -0.0015],
        [-0.0015, -0.0015, -0.0015,  ..., -0.0015, -0.0015, -0.0015],
        [ 0.0006,  0.0006,  0.0006,  ...,  0.0006,  0.0006,  0.0006]])


In [34]:
optimizer.step()
print('Updated weights - ', model[0].weight)

Updated weights -  Parameter containing:
tensor([[ 0.0191,  0.0279,  0.0125,  ..., -0.0194,  0.0042,  0.0076],
        [-0.0182, -0.0089, -0.0232,  ...,  0.0235,  0.0255,  0.0159],
        [-0.0251, -0.0054, -0.0063,  ..., -0.0026, -0.0145,  0.0051],
        ...,
        [ 0.0272, -0.0195,  0.0226,  ...,  0.0215, -0.0239, -0.0084],
        [ 0.0110,  0.0165,  0.0074,  ..., -0.0256,  0.0208, -0.0004],
        [-0.0106, -0.0063, -0.0246,  ...,  0.0010,  0.0326, -0.0346]],
       requires_grad=True)


In [37]:
model = nn.Sequential(nn.Linear(784, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Linear(64, 10),
                      nn.LogSoftmax(dim=1))
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.01)
epochs = 10
for e in range(epochs):
    running_loss = 0
    for images,labels in trainloader:
        images = images.view(images.shape[0],-1)
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        running_loss +=loss.item()
    else:
        print({running_loss/len(trainloader)})

{2.008348879815419}
{2.5641349546436563}
{2.540095566686537}
{2.5631399907028753}
{2.5963198050761274}
{2.654329817686508}
{2.6800663743191944}
{2.730483985658902}
{2.864411003554045}
{2.908825506533641}
