In [18]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch
import numpy as np
import cv2
import matplotlib.pyplot as plt
from numpy.random import rand, randn
import torchvision.datasets as dset
import torchvision.transforms as transforms

transform = transforms.Compose([transforms.ToTensor()])

train_set = dset.MNIST(root='', train=True, download=True, transform=transform)
test_set = dset.MNIST(root='', train=False, download=True, transform=transform)

batch_size = 128

train_loader = torch.utils.data.DataLoader(
                 dataset=train_set,
                 batch_size=batch_size,
                 shuffle=True)
test_loader = torch.utils.data.DataLoader(
                dataset=test_set,
                batch_size=batch_size,
                shuffle=False)
print('==>>> total trainning batch number: {}'.format(len(train_loader)))
print('==>>> total testing batch number: {}'.format(len(test_loader)))

==>>> total trainning batch number: 469
==>>> total testing batch number: 79


In [22]:
class FCNet(nn.Module):
    def __init__(self):
        
        super(FCNet, self).__init__()
        
        self.fc1 = nn.Linear(28*28, 200)
        self.fc2 = nn.Linear(200, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        x = F.softmax(self.fc2(x))
        return x

In [23]:
net = FCNet()

In [24]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0)

for epoch in range(50):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
    print('[%d] loss: %.3f' % (epoch + 1, running_loss / len(train_loader.dataset)))
    running_loss = 0.0
            
            #convnet.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = net(data)
            test_loss += criterion(output, target) # sum up batch loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    print(torch.mean(torch.abs(net.fc1.weight)), torch.mean(torch.abs(net.fc1.weight.grad)), torch.std(net.fc1.weight.grad))
    print(torch.mean(torch.abs(net.fc2.weight)), torch.mean(torch.abs(net.fc2.weight.grad)), torch.std(net.fc2.weight.grad))

print('Finished Training')



[1] loss: 0.018

Test set: Average loss: 0.0181, Accuracy: 3627/10000 (36%)

tensor(0.0179, grad_fn=<MeanBackward1>) tensor(0.0001) tensor(0.0002)
tensor(0.0350, grad_fn=<MeanBackward1>) tensor(0.0005) tensor(0.0007)
[2] loss: 0.018

Test set: Average loss: 0.0179, Accuracy: 3900/10000 (39%)

tensor(0.0179, grad_fn=<MeanBackward1>) tensor(0.0001) tensor(0.0002)
tensor(0.0360, grad_fn=<MeanBackward1>) tensor(0.0007) tensor(0.0011)
[3] loss: 0.018

Test set: Average loss: 0.0175, Accuracy: 4051/10000 (41%)

tensor(0.0180, grad_fn=<MeanBackward1>) tensor(0.0002) tensor(0.0003)
tensor(0.0384, grad_fn=<MeanBackward1>) tensor(0.0014) tensor(0.0022)
[4] loss: 0.017

Test set: Average loss: 0.0168, Accuracy: 5333/10000 (53%)

tensor(0.0181, grad_fn=<MeanBackward1>) tensor(0.0002) tensor(0.0005)
tensor(0.0423, grad_fn=<MeanBackward1>) tensor(0.0021) tensor(0.0034)
[5] loss: 0.016

Test set: Average loss: 0.0159, Accuracy: 5377/10000 (54%)

tensor(0.0182, grad_fn=<MeanBackward1>) tensor(0.0002) 

KeyboardInterrupt: 