In [1]:
!python --version

Python 3.8.3


In [2]:
import torch

In [3]:
from pathlib import Path
import requests

In [4]:
DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"

In [5]:
PATH.mkdir(parents=True, exist_ok=True)

In [6]:
url = "https://github.com/pytorch/tutorials/raw/master/_static/"
filename = "mnist.pkl.gz"

In [7]:
if not (PATH / filename).exists():
        content = requests.get(url + filename).content
        (PATH / filename).open("wb").write(content)

In [8]:
import pickle
import gzip

In [9]:
with gzip.open((PATH / filename).as_posix(), "rb") as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")


In [10]:
x_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [11]:
x_train.shape

(50000, 784)

In [12]:
from matplotlib import pyplot
import numpy as np


In [13]:
import torch

x_train, y_train, x_valid, y_valid = map(
    torch.tensor, (x_train, y_train, x_valid, y_valid)
)
n, c = x_train.shape
print(x_train, y_train)
print(x_train.shape)
print(y_train.min(), y_train.max())

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]) tensor([5, 0, 4,  ..., 8, 4, 8])
torch.Size([50000, 784])
tensor(0) tensor(9)


In [None]:
import math

weights = torch.normal(0, 1, size=(784, 10)) / math.sqrt(784)
weights.requires_grad_()
bias = torch.zeros(10, requires_grad=True)

In [None]:
def log_softmax(x):
    return x - x.exp().sum(-1).log().unsqueeze(-1)

def model(xb):
    return log_softmax(xb @ weights + bias)

In [None]:
bs = 64  # batch size

xb = x_train[0:bs]  # a mini-batch from x
preds = model(xb)  # predictions
preds[0], preds.shape
print(preds[0], preds.shape)

In [None]:
def nll(input, target):
    return -input[range(target.shape[0]), target].mean()

loss_func = nll

In [25]:
y_train[0:32]

tensor([5, 0, 4, 1, 9, 2, 1, 3, 1, 4, 3, 5, 3, 6, 1, 7, 2, 8, 6, 9, 4, 0, 9, 1,
        1, 2, 4, 3, 2, 7, 3, 8])

In [None]:
yb = y_train[0:bs]
print(loss_func(preds, yb))


In [None]:
def accuracy(out, yb):
    preds = torch.argmax(out, dim=1)
    return (preds == yb).float().mean()

In [None]:
print(accuracy(preds, yb))

In [None]:
#from IPython.core.debugger import set_trace

lr = 0.5  # learning rate
epochs = 2  # how many epochs to train for

for epoch in range(epochs):
    for i in range((n - 1) // bs + 1):
        #         set_trace()
        start_i = i * bs
        end_i = start_i + bs
        xb = x_train[start_i:end_i]
        yb = y_train[start_i:end_i]
        pred = model(xb)
        loss = loss_func(pred, yb)

        loss.backward()
        with torch.no_grad():
            weights -= weights.grad * lr
            bias -= bias.grad * lr
            weights.grad.zero_()
            bias.grad.zero_()

In [None]:
print(loss_func(model(xb), yb), accuracy(model(xb), yb))

In [16]:
# the typical loop
# model, opt = get_model()

# for epoch in range(epochs):
#     for xb, yb in train_dl:
#         pred = model(xb)
#         loss = loss_func(pred, yb)

#         loss.backward()
#         opt.step()
#         opt.zero_grad()

# print(loss_func(model(xb), yb))

In [14]:
import torch.nn.functional as F
import torch.nn as nn

In [15]:
class resNetBlock(nn.Module):
    def __init__(self, channels_in, channels_out, ds):
        super().__init__()
        #downsample means stride == 2, else 1
        if ds:
            self.conv1 = nn.Conv2d(channels_in, channels_out, 3, stride=2, padding=1)
            self.shortcut = nn.Sequential(
                nn.Conv2d(channels_in, channels_out, 1, stride=2),
                nn.BatchNorm2d(channels_out)
            )
        else:
            self.conv1 = nn.Conv2d(channels_in, channels_out, 3, stride=1, padding=1)
            self.shortcut = nn.Identity()
            
        self.conv2 = nn.Conv2d(channels_out, channels_out, 3, stride = 1, padding = 1)
        self.bn1 = nn.BatchNorm2d(channels_out)
        self.bn2 = nn.BatchNorm2d(channels_out)
    
    def forward(self, x):
        shortcut = self.shortcut(x)
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
#         print(x.shape)
#         print(shortcut.shape)
        x = x + shortcut
        return F.relu(self.bn2(x))

In [16]:
class resNet(nn.Module):
    def __init__(self, channels_in, rnb_class, output_dim=10):
        super().__init__()
        self.l0 = nn.Sequential(
            nn.Conv2d(channels_in, 64, 7, stride=2, padding=3),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        
        self.l1 = nn.Sequential(
            rnb_class(64, 64, False),
            rnb_class(64, 64, False)
        )
        
        self.l2 = nn.Sequential(
            rnb_class(64, 128, True),
            rnb_class(128, 128, False)
        )
        self.l3 = nn.Sequential(
            rnb_class(128, 256, True),
            rnb_class(256, 256, False)
        )
        self.l4 = nn.Sequential(
            rnb_class(256, 512, True),
            rnb_class(512, 512, False)
        )
        #Adaptive
        self.gap = torch.nn.AdaptiveAvgPool2d(1)
        self.fc = torch.nn.Linear(256, output_dim)
        
    def forward(self, x):
        x = self.l0(x)
        x = self.l1(x)
        x = self.l2(x)
        x = self.l3(x)
#         x = self.l4(x)
        x = self.gap(x)
#         print(x.shape)
        x = torch.flatten(x, start_dim=1)
#         print(x.shape)
        x = self.fc(x)
        return x

In [17]:
myRN = resNet(1, resNetBlock, 10)

In [18]:
from torchsummary import summary
summary(myRN, (1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 14, 14]           3,200
         MaxPool2d-2             [-1, 64, 7, 7]               0
       BatchNorm2d-3             [-1, 64, 7, 7]             128
              ReLU-4             [-1, 64, 7, 7]               0
          Identity-5             [-1, 64, 7, 7]               0
            Conv2d-6             [-1, 64, 7, 7]          36,928
       BatchNorm2d-7             [-1, 64, 7, 7]             128
            Conv2d-8             [-1, 64, 7, 7]          36,928
       BatchNorm2d-9             [-1, 64, 7, 7]             128
      BatchNorm2d-10             [-1, 64, 7, 7]             128
      resNetBlock-11             [-1, 64, 7, 7]               0
         Identity-12             [-1, 64, 7, 7]               0
           Conv2d-13             [-1, 64, 7, 7]          36,928
      BatchNorm2d-14             [-1, 6

In [19]:
x_train = torch.reshape(x_train, (50000, 1,28,28)).to(torch.float32)

In [20]:
x_valid = torch.reshape(x_valid, (x_valid.shape[0], 1,28,28)).to(torch.float32)

In [21]:
print(x_train.shape)
x_valid.shape

torch.Size([50000, 1, 28, 28])


torch.Size([10000, 1, 28, 28])

In [22]:
from torch.utils.data import TensorDataset as TDS
from torch.utils.data import DataLoader

train_ds = TDS(x_train[:32*60], y_train[:32*60])
val_ds = TDS(x_valid[:32*10], y_valid[:32*10])

train_ds = DataLoader(train_ds, batch_size=32)
val_ds = DataLoader(val_ds, batch_size=32)

# our model
myRN

opt = torch.optim.AdamW(myRN.parameters())
loss_func = F.cross_entropy

In [None]:
# numepochs = 10
# for i in range(numepochs):
#     myRN.train()
#     tls = 0
#     for xb, yb in train_ds:
# #         print(xb.shape)
#         preds = myRN(xb)
# #         _, preds = torch.max(out.data, 1)
# #         print(preds.shape)
#         loss = loss_func(preds, yb)
#         tls += loss.item()
#         loss.backward()
#         opt.step()
#         opt.zero_grad()
#     print("training loss")
#     print(tls)
#     # val
#     myRN.eval()
#     with torch.no_grad(): # xbv, ybv in val_ds:))
#         loss2 = sum(loss_func(myRN(xbv), ybv) for xbv, ybv in val_ds)
#         print("Val loss epoch", i, ":", loss2)

In [23]:
numepochs = 10
for i in range(numepochs):
    myRN.train()
    tls = 0
    for xb, yb in train_ds:
#         print(xb.shape)
        preds = myRN(xb)
#         _, preds = torch.max(out.data, 1)
#         print(preds.shape)
        loss = loss_func(preds, yb)
        tls += loss.item()
        loss.backward()
        opt.step()
        opt.zero_grad()
    print("training loss")
    print(tls)
    # val
    myRN.eval()
    with torch.no_grad(): # xbv, ybv in val_ds:))
        valpreds, actual = zip(*[(myRN(xbv), ybv) for xbv, ybv in val_ds])
        valpreds = torch.argmax(torch.cat(valpreds), dim=1)
        print("Val acc epoch", i, ":")        
        print(sum([p == a for p,a in zip(valpreds, torch.cat(actual))])/(val_ds.batch_size * len(val_ds)))
        loss2 = sum(loss_func(myRN(xbv), ybv) for xbv, ybv in val_ds)
        print("Val loss epoch", i, ":", loss2)

training loss
35.18148212879896
Val acc epoch 0 :
tensor(0.9219)
Val loss epoch 0 : tensor(5.2100)
training loss
10.280327830463648
Val acc epoch 1 :
tensor(0.9375)
Val loss epoch 1 : tensor(2.9550)
training loss
6.072369011119008
Val acc epoch 2 :
tensor(0.8906)
Val loss epoch 2 : tensor(4.0395)
training loss
4.167758401017636
Val acc epoch 3 :
tensor(0.8875)
Val loss epoch 3 : tensor(3.5296)
training loss
3.679632104933262
Val acc epoch 4 :
tensor(0.8969)
Val loss epoch 4 : tensor(3.9238)
training loss
2.9543117783032358
Val acc epoch 5 :
tensor(0.9187)
Val loss epoch 5 : tensor(2.8310)
training loss
2.290273612132296
Val acc epoch 6 :
tensor(0.9563)
Val loss epoch 6 : tensor(1.9087)
training loss
1.2824750032741576
Val acc epoch 7 :
tensor(0.9344)
Val loss epoch 7 : tensor(1.9509)
training loss
0.8547915040398948
Val acc epoch 8 :
tensor(0.9500)
Val loss epoch 8 : tensor(1.7844)
training loss
0.9159827973344363
Val acc epoch 9 :
tensor(0.8969)
Val loss epoch 9 : tensor(3.1752)


In [63]:
with torch.no_grad(): # xbv, ybv in val_ds:))
    valpreds, actual = zip(*[(myRN(xbv), ybv) for xbv, ybv in val_ds])
    valpreds = torch.argmax(torch.cat(valpreds), dim=1)
    print(sum([p == a for p,a in zip(valpreds, torch.cat(actual))])/(val_ds.batch_size * len(val_ds)))


tensor(0.6969)


In [43]:
print(torch.argmax(valpreds[0],dim=1))

tensor([3, 8, 4, 4, 4, 4, 4, 3, 8, 4, 5, 4, 3, 8, 4, 8, 4, 5, 0, 5, 9, 7, 4, 1,
        4, 4, 0, 4, 2, 4, 4, 4])


In [55]:
valpreds.shape

torch.Size([320])

In [46]:
torch.cat(valpreds).shape

torch.Size([320, 10])

In [54]:
torch.cat(actual).shape

torch.Size([320])

In [62]:
val_ds.batch_size * len(val_ds)

320