In [1]:
import numpy as np
import torch
from torchvision import datasets, transforms
import random
import time

In [2]:
seed = 42
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.set_num_threads(8)
np.random.seed(seed)
torch.manual_seed(seed)
dtype = np.float32

In [3]:
# Build and transform the dataset
kwargs = {"num_workers": 1, "pin_memory": True}
trains = datasets.MNIST('./data', train=True, download=True,
                        transform=transforms.Compose([
                            transforms.ToTensor(),
                            ((0.1307,), (0.3081,)),
                        ]))
tests = datasets.MNIST('./data', train=False,
                       transform=transforms.Compose([
                            transforms.ToTensor(),
                            transforms.Normalize((0.1307,), (0.3081,)),
                        ]))
batch_size = 64
train_loader = torch.utils.data.DataLoader(trains, batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(tests, batch_size=batch_size, shuffle=True, **kwargs)

0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


 99%|█████████▉| 9805824/9912422 [00:24<00:00, 509413.19it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz



0it [00:00, ?it/s][A

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz



  0%|          | 0/28881 [00:00<?, ?it/s][A
 57%|█████▋    | 16384/28881 [00:01<00:00, 63300.34it/s][A
32768it [00:01, 29928.45it/s]                           [A
0it [00:00, ?it/s][A

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz



  0%|          | 0/1648877 [00:00<?, ?it/s][A
  1%|          | 16384/1648877 [00:00<00:29, 55319.94it/s][A
  3%|▎         | 49152/1648877 [00:01<00:23, 66705.22it/s][A
  6%|▌         | 98304/1648877 [00:01<00:18, 83145.55it/s][A
  9%|▉         | 155648/1648877 [00:01<00:15, 97672.49it/s][A
 13%|█▎        | 221184/1648877 [00:02<00:12, 114859.96it/s][A
 17%|█▋        | 278528/1648877 [00:02<00:10, 134933.97it/s][A
 21%|██▏       | 352256/1648877 [00:02<00:08, 160295.04it/s][A
 25%|██▌       | 417792/1648877 [00:02<00:06, 182064.88it/s][A
 30%|██▉       | 491520/1648877 [00:03<00:06, 188062.86it/s][A
 35%|███▍      | 573440/1648877 [00:03<00:05, 199539.88it/s][A
 39%|███▉      | 647168/1648877 [00:03<00:04, 219910.92it/s][A
 45%|████▍     | 737280/1648877 [00:04<00:03, 246370.29it/s][A
 50%|████▉     | 819200/1648877 [00:04<00:02, 310849.44it/s][A
 53%|█████▎    | 868352/1648877 [00:04<00:02, 308544.45it/s][A
 56%|█████▌    | 917504/1648877 [00:04<00:02, 268782.53it/s][A

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz




  0%|          | 0/4542 [00:00<?, ?it/s][A[A

8192it [00:00, 15608.03it/s]            [A[A

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [4]:
train_data = [x.view(28*28, -1).numpy() for x in trains.train_data]



In [5]:
train_labels = trains.train_labels.numpy()



In [6]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def sigmoid_prime(x):
    dx = sigmoid(x)
    return dx * (1.0 - dx)

In [7]:
class FNN(object):
    """The fully connected neural network"""
    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(x, y) for x, y in zip(sizes[1:], sizes[:-1])]

    def feedforward(self, a):
        """Feedforward to compute loss"""
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a) + b)
        return a

    def backprop(self, x, y):
        """Backpropagate to compute gradients"""
        grad_b = [np.zeros(b.shape) for b in self.biases]
        grad_w = [np.zeros(w.shape) for w in self.weights]
        activations = []
        # the input will act as the activation of the weight in the first layer
        activations.append(x)
        zs = []
        l = 0
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activations[l]) + b
            zs.append(z)
            activations.append(sigmoid(z))
            l = l + 1
        delta = (activations[-1] - y) * (sigmoid_prime(zs[-1]))
        grad_b[-1] = delta
        grad_w[-1] = np.dot(delta, activations[-2].transpose())
        for l in range(2, self.num_layers):
            delta = np.dot(self.weights[-l + 1].transpose(), delta) * sigmoid_prime(zs[-l])
            grad_b[-l] = delta
            grad_w[-l] = np.dot(delta, activations[-l - 1].transpose())
        return zip(grad_b, grad_w)

    def update(self, data, lr):
        """Update weights and biases with gradients derived by backpropagation"""
        grad_b_sum = [np.zeros(b.shape) for b in self.biases]
        grad_w_sum = [np.zeros(w.shape) for w in self.weights]
        for x, y in data:
            l = 0
            for grad_b, grad_w in self.backprop(x, y):
                grad_b_sum[l] = grad_b_sum[l] + grad_b
                grad_w_sum[l] = grad_w_sum[l] + grad_w
                l = l + 1
        self.biases = [b - lr / len(data) * grad_b for b, grad_b in zip(self.biases, grad_b_sum)]
        self.weights = [w - lr / len(data) * grad_w for w, grad_w in zip(self.weights, grad_w_sum)]

    def grad_descent(self, dataloader, lr):
        """Batch gradient descent algorithm"""
        for i, (images, labels) in enumerate(dataloader):
            x_list = [image.view(28*28, -1).numpy() for image in images]
            y_list = []
            for y_value in labels.numpy():
                y = np.zeros((10, 1))
                y[y_value] = 1.0
                y_list.append(y)
            data = list(zip(x_list, y_list))
            self.update(data, lr)

In [8]:
num_epochs = 10
test_data = list(zip(train_data, train_labels))
fnn = FNN([784, 30, 10])
start_time = time.time()
for epoch in range(num_epochs):
    epoch_start_time = time.time()
    fnn.grad_descent(train_loader, 3)
    count = 0
    for (x, y) in test_data:
        if np.argmax(fnn.feedforward(x)) == y:
            count = count + 1
    print('Epochs {}: {}% time: {}s'.format(epoch + 1, format(count / len(test_data) * 100.0, '.2f'), time.time() - epoch_start_time))
print('total time cost: {}s'.format(time.time() - start_time))

TypeError: Traceback (most recent call last):
  File "/anaconda3/lib/python3.6/site-packages/torch/utils/data/_utils/worker.py", line 99, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/anaconda3/lib/python3.6/site-packages/torch/utils/data/_utils/worker.py", line 99, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/anaconda3/lib/python3.6/site-packages/torchvision/datasets/mnist.py", line 95, in __getitem__
    img = self.transform(img)
  File "/anaconda3/lib/python3.6/site-packages/torchvision/transforms/transforms.py", line 61, in __call__
    img = t(img)
TypeError: 'tuple' object is not callable


9920512it [00:40, 509413.19it/s]                             
1654784it [00:23, 405815.73it/s]                             [A