In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

batch_size = 256
learning_rate = 1e-3
epochs = 5

In [3]:
training_data = datasets.MNIST(
    root="../data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.MNIST(
    root="../data",
    train=False,
    download=True,
    transform=ToTensor(),
)

# Create data loaders.
train_dl = DataLoader(training_data, batch_size=batch_size)
test_dl = DataLoader(test_data, batch_size=1)

In [4]:
for x, y in test_dl:
    print(x.shape, y.shape)
    break

torch.Size([1, 1, 28, 28]) torch.Size([1])


In [5]:
class LeNet(nn.Module):

    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.BatchNorm2d(6),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(6, 16, kernel_size=5),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.conv3 = nn.Sequential(
           nn.Conv2d(16, 10, kernel_size=5),
        #    nn.ReLU(),
           nn.Flatten()
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        # out = F.softmax(x, dim=1)
        return x

net = LeNet().to(device)

In [6]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [7]:
def train_loop(dataloader, net, loss_fn, optim, device="cuda"):
    for batch_i, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        pred = net(X)
        loss = loss_fn(pred, y)
        
        optim.zero_grad()
        loss.backward()
        optim.step()
        
        if batch_i % 100 == 0:
            loss, current = loss.item(), batch_i * len(X)
            print(f"Batch: {batch_i}, loss: {loss}")

def test_loop(dataloader, model, loss_fn, device="cuda"):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for data, label in dataloader:
            data = data.to(device)
            label = label.to(device)
            pred = model(data)
            test_loss += loss_fn(pred, label).item()
            correct += (pred.argmax(1) == label).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(
        f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    
    return correct*100

In [8]:
for i in range(epochs):
    print(f"==== Epoch: {i + 1} ====")
    train_loop(train_dl, net, loss_fn, optimizer)
    test_loop(test_dl, net, loss_fn)

==== Epoch: 1 ====
Batch: 0, loss: 2.540403366088867
Batch: 100, loss: 0.17764760553836823
Batch: 200, loss: 0.09950587153434753
Test Error: 
 Accuracy: 95.4%, Avg loss: 0.154946 

==== Epoch: 2 ====
Batch: 0, loss: 0.1157233864068985
Batch: 100, loss: 0.0815146416425705
Batch: 200, loss: 0.06381628662347794
Test Error: 
 Accuracy: 95.1%, Avg loss: 0.146579 

==== Epoch: 3 ====
Batch: 0, loss: 0.07881902903318405
Batch: 100, loss: 0.05939766764640808
Batch: 200, loss: 0.05568619444966316
Test Error: 
 Accuracy: 94.2%, Avg loss: 0.166889 

==== Epoch: 4 ====
Batch: 0, loss: 0.061107881367206573
Batch: 100, loss: 0.0452219694852829
Batch: 200, loss: 0.04969136789441109
Test Error: 
 Accuracy: 93.8%, Avg loss: 0.178978 

==== Epoch: 5 ====
Batch: 0, loss: 0.04921777918934822
Batch: 100, loss: 0.037676747888326645
Batch: 200, loss: 0.04183826968073845
Test Error: 
 Accuracy: 93.3%, Avg loss: 0.188400 



In [10]:
# torch.save(net.state_dict(), "params.pth")

In [31]:
for x, y in test_dl:
    pred = net(x.to(device)[0].unsqueeze(0))
    pred_index = pred.argmax()
    print(pred_index)
    print(y[0])
    torch.save(x[0].unsqueeze(0), 'test_data')
    break

tensor(7, device='cuda:0')
tensor(7)


In [32]:
net(torch.load("test_data").to(device))

tensor([[ -3.7362,  -2.7334,   1.5859,   2.5465,  -3.9135,  -5.2472, -14.1808,
          13.9857,  -2.0783,   2.9147]], device='cuda:0',
       grad_fn=<ReshapeAliasBackward0>)

In [13]:
param = torch.load("params.pth")
param.keys()

odict_keys(['conv1.0.weight', 'conv1.0.bias', 'conv1.2.weight', 'conv1.2.bias', 'conv1.2.running_mean', 'conv1.2.running_var', 'conv1.2.num_batches_tracked', 'conv2.0.weight', 'conv2.0.bias', 'conv2.2.weight', 'conv2.2.bias', 'conv2.2.running_mean', 'conv2.2.running_var', 'conv2.2.num_batches_tracked', 'conv3.0.weight', 'conv3.0.bias'])