In [6]:
import torch
from torch import nn

from torch.optim import lr_scheduler
from torchvision import datasets, transforms


In [7]:

class LeNet(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.Lenet = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2),
            nn.BatchNorm2d(6),
            nn.Sigmoid(),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
            nn.BatchNorm2d(16),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5),
            nn.BatchNorm2d(120),
            nn.Flatten(),
            nn.Linear(120, 84),
            nn.BatchNorm1d(84), # 1d
            nn.Linear(84, 10)
        )

    def forward(self, x):
        return self.Lenet(x)


In [8]:
import torch.utils
import torch.utils.data


transform = transforms.Compose([
    transforms.ToTensor()
])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

print( f'train size = {len(train_dataset)}' )
print( f'test size = {len(test_dataset)}' )

batch_size = 4
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

train size = 60000
test size = 10000


In [9]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = 'cpu'
model = LeNet().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

In [10]:
def training(dataloader, model, loss_fn, optimizer):
    
    model.train()

    loss, correct= 0.0, 0.0
    n = 0

    for X,y in dataloader:
    
        X,y = X.to(device), y.to(device)
        output = model(X)

        cur_loss = loss_fn(output, y)
    
        _, pred = torch.max(output, axis = 1)
        print(output,y,pred,sep='\n----------\n')
        break
        optimizer.zero_grad()
        cur_loss.backward()
        optimizer.step()

        loss += cur_loss * len(y)
        correct += torch.sum(pred==y).item()
        n += len(y)
    return loss / n, correct / n


In [11]:
def val(dataloader, model, loss_fn):
    
    model.eval()

    with torch.no_grad():

        loss, correct= 0.0, 0.0
        n = 0 

        for X,y in dataloader:

            X,y = X.to(device), y.to(device)
            output = model(X)
            _, pred = torch.max(output, axis = 1)
            
            cur_loss = loss_fn(output, y)
            correct += torch.sum(pred==y).item()
            loss += cur_loss * len(y)
            n += len(y)
            
    return loss/n, correct/n

In [12]:
epochs = 10
max_acc = 0

params = model.state_dict()

for epoch in range(epochs):

    loss, acc = training(train_loader, model, loss_fn, optimizer)
    print(f'epoch {epoch + 1}/{epochs}: loss = {loss} acc = {acc}')
    break
    loss, acc = val(test_loader, model, loss_fn)
    print(f'Validation: loss = {loss} acc = {acc}\n')

    # if acc > max_acc:
    #     max_acc = acc
    #     params = model.state_dict()
    #     print('saved local best')

tensor([[ 1.1656e+00,  9.2382e-03,  1.3240e+00, -4.0105e-01,  8.0792e-02,
         -3.3268e-02, -2.3275e-01, -4.3888e-01, -4.3683e-01,  4.2631e-01],
        [ 1.3845e-01,  6.8420e-01, -7.3093e-01,  7.8701e-04, -7.4541e-01,
          8.9024e-02, -4.5218e-01,  5.6884e-02,  5.2930e-01,  4.0540e-01],
        [-1.0075e+00,  1.2790e-03,  3.1477e-01, -2.0390e-01,  6.8482e-01,
          3.5114e-01,  4.6164e-01, -6.7865e-03, -3.4495e-01, -7.0498e-01],
        [-2.8345e-01, -8.4720e-01, -1.0443e+00,  4.5808e-01,  3.7140e-01,
         -3.2739e-01,  6.2033e-01,  4.4460e-01,  2.3321e-01, -3.5589e-01]],
       device='cuda:0', grad_fn=<AddmmBackward0>)
----------
tensor([7, 7, 3, 9], device='cuda:0')
----------
tensor([2, 1, 4, 6], device='cuda:0')


ZeroDivisionError: float division by zero

In [18]:
import torch.nn.functional as F

In [19]:
x = [[ 1.1656e+00,  9.2382e-03,  1.3240e+00, -4.0105e-01,  8.0792e-02,
         -3.3268e-02, -2.3275e-01, -4.3888e-01, -4.3683e-01,  4.2631e-01],
        [ 1.3845e-01,  6.8420e-01, -7.3093e-01,  7.8701e-04, -7.4541e-01,
          8.9024e-02, -4.5218e-01,  5.6884e-02,  5.2930e-01,  4.0540e-01],
        [-1.0075e+00,  1.2790e-03,  3.1477e-01, -2.0390e-01,  6.8482e-01,
          3.5114e-01,  4.6164e-01, -6.7865e-03, -3.4495e-01, -7.0498e-01],
        [-2.8345e-01, -8.4720e-01, -1.0443e+00,  4.5808e-01,  3.7140e-01,
         -3.2739e-01,  6.2033e-01,  4.4460e-01,  2.3321e-01, -3.5589e-01]]

x = torch.tensor(x)
fn = nn.CrossEntropyLoss()

y = torch.tensor([7, 7, 3, 9])

fn(x,y)

tensor(2.6871)

In [25]:
yy = F.one_hot(y, num_classes=10)

yy = yy * 1.
fn(x,yy)

tensor(2.6871)