In [23]:
import torch
from torch import nn
import torch.nn.functional as F
import torchvision.datasets as datasets
from tqdm.notebook import tqdm

<img src = "LeNet5_architecture.png">

In this implementation, we use the paper's original architecture. 

In [24]:
mnist = datasets.MNIST(root = "../data", download = True)

In [25]:
device = "cuda" if torch.cuda.is_available() else "cpu"
bs = 100

In [26]:
class MNISTTrainingDataset(torch.utils.data.Dataset):
    def __init__(self, data, targets, mean, std):
        super().__init__()
        train = data.reshape((-1,1,28,28))
        flat = train.flatten().to(torch.float32)

        self.train = ((train - mean)/ std).to(device)
        self.label = targets.to(torch.long).to(device)
        
    def __getitem__(self, i):
        return self.train[i], self.label[i]
    
    def __len__(self):
        return self.train.shape[0]

In [27]:
def train_test_datasets():
    #we have to use the training mean on the test set
    train = mnist.data[:50000].reshape((-1,1,28,28))
    flat = train.flatten().to(torch.float32)
    mean = flat.mean()
    std = flat.std()
    
    return MNISTTrainingDataset(mnist.data[:50000], mnist.targets[:50000], mean,std),\
        MNISTTrainingDataset(mnist.data[50000:], mnist.targets[50000:], mean, std)   

In [28]:
mnist_train, mnist_test = train_test_datasets()

In [29]:
train_dataloader = torch.utils.data.DataLoader(mnist_train, batch_size = bs, shuffle = True)
test_dataloader = torch.utils.data.DataLoader(mnist_train, batch_size = bs, shuffle = False)

In [30]:
class SubSampling(nn.Module):
    def __init__(self, kernel_size, stride = None):
        super().__init__()
        self.a = nn.Parameter(torch.tensor(1, requires_grad = True, dtype = torch.float32, device = device))
        self.b = nn.Parameter(torch.tensor(1, requires_grad = True, dtype = torch.float32, device = device))
        
        self.kernel_size = kernel_size
        self.stride = stride if stride else kernel_size
            
    def forward(self, x):
        with torch.no_grad():
            channels = x.shape[1]

        kernel = torch.ones(channels, channels, self.kernel_size, self.kernel_size).type(torch.float32).to(device)
        
        x = F.conv2d(x, kernel, stride = self.stride)
        return self.a * x + self.b 
    

In [31]:
class LeNetOriginal(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5, padding = 2)
        self.subsampling1 = SubSampling(2 , 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.subsampling2 = SubSampling(2,2)
        self.conv3 = nn.Conv2d(16,120,5)
        self.f1 = nn.Linear(120, 84)
        self.f2 = nn.Linear(84,10)
        
    def forward(self, x):
        C1 = self.conv1(x)
        S2 = F.relu(self.subsampling1(C1))
        C3 = self.conv2(S2)
        S4 = F.relu(self.subsampling2(C3))
        C5 = F.relu(self.conv3(S4).squeeze())
        F6 = F.relu(self.f1(C5))
        return self.f2(F6)
        

In [32]:
lenet = LeNetOriginal().cuda()

In [33]:
loss = nn.CrossEntropyLoss()
optim = torch.optim.SGD(lenet.parameters(), lr=0.003)

In [34]:
scheduler = torch.optim.lr_scheduler.MultiplicativeLR(optim, lambda e: 0.95)

In [35]:
def get_accuracy():
    accumulated_accuracy = []
    for batch, truth in test_dataloader:
        weighted_rate = (lenet(batch).argmax(dim = 1) == truth).sum().cpu().numpy()
        accumulated_accuracy.append(weighted_rate)
    return sum(accumulated_accuracy)/bs/len(test_dataloader)

In [36]:
accuracy = []

In [37]:
#training
epochs = 30

for epoch in tqdm(range(epochs)):
    temp = get_accuracy()
    accuracy.append(temp)
    print(temp)
    for batch, preds in train_dataloader:
        y_pred = lenet(batch)
        
        l = loss(y_pred, preds)
        
        l.backward()
        
        optim.step()
        
        optim.zero_grad()


HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

0.09888
0.11342
0.8363400000000001
0.90716
0.9264199999999999
0.9347000000000001
0.94496
0.95078
0.9476
0.95462
0.9557599999999999
0.95128
0.9597
0.95378
0.96236
0.96402
0.96716
0.95814
0.9691000000000001
0.9694400000000001
0.96858
0.9653999999999999
0.9722000000000001
0.9706
0.97176
0.9749
0.97276
0.97536
0.97282
0.97474



In [38]:
accuracy

[0.09888,
 0.11342,
 0.8363400000000001,
 0.90716,
 0.9264199999999999,
 0.9347000000000001,
 0.94496,
 0.95078,
 0.9476,
 0.95462,
 0.9557599999999999,
 0.95128,
 0.9597,
 0.95378,
 0.96236,
 0.96402,
 0.96716,
 0.95814,
 0.9691000000000001,
 0.9694400000000001,
 0.96858,
 0.9653999999999999,
 0.9722000000000001,
 0.9706,
 0.97176,
 0.9749,
 0.97276,
 0.97536,
 0.97282,
 0.97474]

We are stuck in a local minimum :(

In [None]:
mnist["test"]

In [26]:
foo.shape

torch.Size([100, 1, 28, 28])

In [28]:
foo=next(iter(train_dataloader))[0].type(torch.float32).cpu()

In [29]:
kernel = torch.tensor([[[0,0],[0,0]] for _ in range(5)]).unsqueeze(1).type(torch.float32)

In [30]:
kernel = torch.tensor([[[1,1],[1,1]] for _ in range(5)]).unsqueeze(1).type(torch.float32)

In [31]:
kernel.shape

torch.Size([5, 1, 2, 2])

In [32]:
F.conv2d(foo, kernel).shape

torch.Size([100, 5, 27, 27])

In [46]:
foo


tensor([[[[-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241],
          [-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241],
          [-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241],
          ...,
          [-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241],
          [-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241],
          [-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241]]],


        [[[-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241],
          [-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241],
          [-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241],
          ...,
          [-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241],
          [-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241],
          [-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241]]],


        [[[-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241],
          [-0.4241, -0.424

In [47]:
import matplotlib.pyplot as plt

In [47]:
list(list(lenet.modules())[2].parameters())

[Parameter containing:
 tensor(0.8417, device='cuda:0', requires_grad=True), Parameter containing:
 tensor(1.0038, device='cuda:0', requires_grad=True)]

In [40]:
lenet.subsampling1.parameters()

<generator object Module.parameters at 0x000002AB911543C8>

AttributeError: 'generator' object has no attribute 'data'