In [59]:
import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

In [60]:
dataset = 'mnist'
batch_size = 64
epochs = 10
lr = 0.01

if dataset == 'mnist':
    data_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    trainset = datasets.MNIST(root='.', train=True, download=True, transform=data_transform)
    testset = datasets.MNIST(root='.', train=False, download=True, transform=data_transform)

train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=0)
test_loader  = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=0)

In [61]:
images, labels = next(iter(train_loader))
images.shape
# we use view to reshape the data to an appropriate input for the model, we know we have a batch size of 64
# but we don't know anything else, so we flatten the rest into 28 x 28 x 1 = 784

torch.Size([64, 1, 28, 28])

In [62]:
images = images.view(64,-1)

In [63]:
# Now that we have defined the model, we need to test run it once to see if it works
input_size = 784
hidden_sizes = [128, 64]
output_size = 10

# Build a feed-forward network
model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[1], output_size),
                      nn.LogSoftmax(dim = 1))
print(model)

Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=10, bias=True)
  (5): LogSoftmax()
)


In [64]:
# Test run 1
predicted_y = model(images)
loss_func = nn.CrossEntropyLoss()
loss = loss_func(predicted_y, labels)

In [65]:
# Up to this part this morning

# What are the gradients 
print('Before backward pass: \n', model[0].weight.grad)
loss.backward()
print('After backward pass: \n', model[0].weight.grad)

Before backward pass: 
 None
After backward pass: 
 tensor([[-0.0036, -0.0036, -0.0036,  ..., -0.0036, -0.0036, -0.0036],
        [-0.0005, -0.0005, -0.0005,  ..., -0.0005, -0.0005, -0.0005],
        [-0.0005, -0.0005, -0.0005,  ..., -0.0005, -0.0005, -0.0005],
        ...,
        [ 0.0027,  0.0027,  0.0027,  ...,  0.0027,  0.0027,  0.0027],
        [ 0.0044,  0.0044,  0.0044,  ...,  0.0044,  0.0044,  0.0044],
        [ 0.0033,  0.0033,  0.0033,  ...,  0.0033,  0.0033,  0.0033]])


In [69]:
# okay so we have one back prop
# define optimizer SGD
optimizer = optim.SGD(model.parameters(), lr=10)
optimizer.zero_grad()
epochs = 10
for e in range(epochs):
    running_loss = 0
    for images, labels in train_loader:
        # Flatten MNIST images into a 784 long vector
        images = images.view(images.shape[0], -1)
    
        # Training pass
        optimizer.zero_grad()
        
        predicted_y = model(images)
        loss = loss_func(predicted_y, labels)
        
        #This is where the model learns by backpropagating
        loss.backward()
        
        #And optimizes its weights here
        optimizer.step()
        
        running_loss += loss.item()
    print("Epoch {} - Training loss: {}".format(e, running_loss/len(train_loader)))

Epoch 0 - Training loss: 5451.524821356098
Epoch 1 - Training loss: 2.379009628346734
Epoch 2 - Training loss: 2.377739315348139
Epoch 3 - Training loss: 2.3781830232534835
Epoch 4 - Training loss: 2.3774945209783787
Epoch 5 - Training loss: 2.382479196418323
Epoch 6 - Training loss: 2.3773455121623934
Epoch 7 - Training loss: 2.3840213221019266
Epoch 8 - Training loss: 2.3793262812628675
Epoch 9 - Training loss: 2.3806432137357145


In [None]:
# when we set the learning rate to 10, the training loss drops quickly form 5451.52 to 2, but stays constantly
# around 2.37 and might sometimes increase

# CIFAR10 Data and Model

In [None]:
dataset = 'mnist'
batch_size = 64
epochs = 10
lr = 0.01

if dataset == 'mnist':
    data_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    trainset = datasets.MNIST(root='.', train=True, download=True, transform=data_transform)
    testset = datasets.MNIST(root='.', train=False, download=True, transform=data_transform)

train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=0)
test_loader  = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=0)