In [1]:


!wget www.di.ens.fr/~lelarge/MNIST.tar.gz
!tar -zxvf MNIST.tar.gz

from torchvision.datasets import MNIST
from torchvision import transforms as transform

train_data = MNIST(root = './', train=True, download=True, transform=transform)
test_data = MNIST(root = './', train=False, download=True, transform=transform)

--2021-03-20 19:05:56--  http://www.di.ens.fr/~lelarge/MNIST.tar.gz
Resolving www.di.ens.fr (www.di.ens.fr)... 129.199.99.14
Connecting to www.di.ens.fr (www.di.ens.fr)|129.199.99.14|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://www.di.ens.fr/~lelarge/MNIST.tar.gz [following]
--2021-03-20 19:05:56--  https://www.di.ens.fr/~lelarge/MNIST.tar.gz
Connecting to www.di.ens.fr (www.di.ens.fr)|129.199.99.14|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/x-gzip]
Saving to: ‘MNIST.tar.gz’

MNIST.tar.gz            [         <=>        ]  33.20M  16.6MB/s    in 2.0s    

2021-03-20 19:05:59 (16.6 MB/s) - ‘MNIST.tar.gz’ saved [34813078]

MNIST/
MNIST/raw/
MNIST/raw/train-labels-idx1-ubyte
MNIST/raw/t10k-labels-idx1-ubyte.gz
MNIST/raw/t10k-labels-idx1-ubyte
MNIST/raw/t10k-images-idx3-ubyte.gz
MNIST/raw/train-images-idx3-ubyte
MNIST/raw/train-labels-idx1-ubyte.gz
MNIST/raw/t10k-images-idx3-ubyte
MNIST/raw/tra

**Activation Function**


In [2]:
train_input = train_data.data.view(-1, 1, 28, 28).float()
train_target = train_data.targets
test_input = test_data.data.view(-1, 1, 28, 28).float()
test_target = test_data.targets

In [3]:
import torch

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
!pwd

/content


In [9]:
#from dlc_practical_prologue import *
import dlc_practical_prologue as p

In [11]:
def sigma(x):
    return torch.tanh(x)

In [12]:
def dsigma(x):
    t = torch.tanh(x)
    return (1 - (torch.pow(t, 2)))

**Loss**

In [13]:
def loss(v, t):
    y = v-t
    return torch.sum(torch.mul(y, y))

In [14]:
def dloss(v, t):
    return 2 * (v - t)

**Forward and backward passes**

In [15]:
def forward_pass(w1, b1, w2, b2, x):
    x0 = x
    s1 = torch.mm(w1, x0)  + b1
    x1 = sigma(s1)
    s2 = torch.mm(w2, x1) + b2
    x2 = sigma(s2)
    return (x0, s1, x1, s2, x2)

In [16]:
def backward_pass(w1, b1, w2, b2, t, x, s1, x1, s2, x2, dl_dw1, dl_db1, dl_dw2, dl_db2):

    dl_dw1 += torch.mm(torch.mul(torch.mm(torch.t(w2), torch.mul(dloss(x2, t), dsigma(s2))), dsigma(s1)), torch.t(x))

    dl_dw2 += torch.mm(torch.mul(dloss(x2, t), dsigma(s2)), torch.t(x1))

    dl_db1 += torch.mul(torch.mm(torch.t(w2), torch.mul(dloss(x2, t), dsigma(s2))), dsigma(s1))

    dl_db2 += torch.mul(dloss(x2, t), dsigma(s2))
    

In [17]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: ./
    Split: Test
    StandardTransform
Transform: <module 'torchvision.transforms' from '/usr/local/lib/python3.7/dist-packages/torchvision/transforms/__init__.py'>

**Training the network**

In [19]:
def error(test, target, w1, b1, w2, b2):
    n = test.shape[0]
    err = 0
    for i in range(n):
        x = test[i].resize(test[i].shape[0], 1)
        t = torch.argmax(target[i])
        pred = forward_pass(w1, b1, w2, b2, x)[-1]
        p = torch.argmax(pred, 0)
        if t != p:
            err += 1
    return err/n

In [22]:
def grad_des():
    train_input, train_target, test_input, test_target = \
        p.load_data(cifar=False, one_hot_labels=True, normalize=True)
    test_target *= 0.9
    train_target *= 0.9
    ps = 0.000001
    print(ps)
    w1 = torch.zeros(50, 784).normal_(0, ps)
    b1 = torch.zeros(50, 1).normal_(0, ps)
    w2 = torch.zeros(10, 50).normal_(0, ps)
    b2 = torch.zeros(10, 1).normal_(0, ps)
    alpha = 0.1/train_input.shape[0]
    for i in range(1000):
        dl_dw1 = torch.zeros(50, 784)
        dl_db1 = torch.zeros(50, 1)
        dl_dw2 = torch.zeros(10, 50)
        dl_db2 = torch.zeros(10, 1)
        for j in range(1000):
            x = train_input[j].resize_(train_input[j].shape[0], 1)
            t = train_target[j].resize_(train_target[j].shape[0], 1)
            x0, s1, x1, s2, x2 = forward_pass(w1, b1, w2, b2, x)
            backward_pass(w1, b1, w2, b2, t, x, s1, x1, s2, x2, dl_dw1, dl_db1, dl_dw2, dl_db2)
        w1 = w1 - alpha * dl_dw1
        w2 = w2 - alpha * dl_dw2
        b1 = b1 - alpha * dl_db1
        b2 = b2 - alpha * dl_db2
    print("Training Error: ", error(train_input, train_target, w1, b1, w2, b2))
    print("Test Error: ", error(test_input, test_target, w1, b1, w2, b2))


grad_des()

* Using MNIST
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=9912422.0), HTML(value='')))


Extracting ./data/mnist/MNIST/raw/train-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=28881.0), HTML(value='')))


Extracting ./data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=1648877.0), HTML(value='')))


Extracting ./data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4542.0), HTML(value='')))


Extracting ./data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples
1e-06
Training Error:  0.002
Test Error:  0.162


