In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torchvision import datasets, transforms

In [2]:
import torch.nn


class SimpleCNN(torch.nn.Module):
    def __init__(self):
        super().__init__()

        #  1x40x40
        self.conv1 = torch.nn.Conv2d(
            in_channels=1, out_channels=32, kernel_size=3, padding='same')
        # 32x40x40
        self.maxpl = torch.nn.MaxPool2d(kernel_size=2)
        # 16x20x20
        self.conv2 = torch.nn.Conv2d(
            in_channels=32, out_channels=64, kernel_size=3, padding='same')
        # 64x20x20
        self.avgpl = torch.nn.AvgPool2d(kernel_size=4)
        # 64x5x5
        self.flatt = torch.nn.Flatten()
        # 1600
        self.line1 = torch.nn.Linear(in_features=1600, out_features=128)
        # 128
        self.activ = torch.nn.ReLU()
        # 128
        self.feats = torch.nn.Linear(in_features=128, out_features=10)
        # 10

    def forward(self, x: torch.Tensor):
        x = self.conv1(x)
        x = self.maxpl(x)
        x = self.conv2(x)
        x = self.avgpl(x)
        x = self.flatt(x)
        x = self.line1(x)
        x = self.activ(x)
        x = self.feats(x)
        return x

In [36]:
def traindata(device, model, epochs, optimizer, loss_function, train_loader, valid_loader):
    # Early stopping
    last_loss = 100
    patience = 2
    triggertimes = 0

    for epoch in range(1, epochs+1):
        model.train()

        for times, data in enumerate(train_loader, 1):
            input = data[0].to(device)
            label = data[1].to(device)

            # Zero the gradients
            optimizer.zero_grad()

            # Forward and backward propagation
            output = model(input)
            loss = loss_function(output, label)
            loss.backward()
            optimizer.step()

            # Show progress
            if times % 100 == 0 or times == len(train_loader):
                print('[{}/{}, {}/{}] loss: {:.8}'.format(epoch, epochs, times, len(train_loader), loss.item()))

        # Early stopping
        current_loss = validation(model, device, valid_loader, loss_function)
        print('The Current Loss:', current_loss)

        if current_loss > last_loss:
            trigger_times += 1
            print('Trigger Times:', trigger_times)

            if trigger_times >= patience:
                print('Early stopping!\nStart to test process.')
                return model

        else:
            print('trigger times: 0')
            trigger_times = 0

        last_loss = current_loss

    return model

In [4]:
def validation(model, device, valid_loader, loss_function):

    model.eval()
    loss_total = 0

    # Test validation data
    with torch.no_grad():
        for data in valid_loader:
            input = data[0].to(device)
            label = data[1].to(device)

            output = model(input)
            loss = loss_function(output, label)
            loss_total += loss.item()

    return loss_total / len(valid_loader)

In [22]:
def test(device, model, test_loader):

    model.eval()
    total = 0
    correct = 0

    with torch.no_grad():
        for data in test_loader:
            input = data[0].to(device)
            label = data[1].to(device)

            output = model(input)
            from matplotlib import pyplot as plt
            plt.imshow(input[1][0].cpu(), cmap = 'gray')
            # print(model(input[0].float()).argmax(dim = 1)[0])
            # plt.set
            predicted = output.argmax(dim = 1)

            print(predicted)
            print(predicted == label)

            total += label.size(0)
            correct += (predicted == label).sum()

    print('Accuracy:', correct / total)

In [33]:
def main():
    # GPU device
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    print('Device state:', device)

    epochs = 1
    batch_size = 66
    lr = 0.004
    loss_function = nn.NLLLoss()
    model = SimpleCNN().to(device)
    model.load_state_dict(torch.load('./cuda_data/cuda_model_dict.pt'))
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Transform
    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Resize(size=(40, 40), antialias=True)]
    )

    # Data
    trainset = datasets.MNIST(root='MNIST', download=True, train=True, transform=transform)
    testset = datasets.MNIST(root='MNIST', download=True, train=False, transform=transform)
   
    trainset_size = int(len(trainset) * 0.8)
    validset_size = len(trainset) - trainset_size
    trainset, validset = data.random_split(trainset, [trainset_size, validset_size])

    trainloader = data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
    testloader = data.DataLoader(testset, batch_size=batch_size, shuffle=False)
    validloader = data.DataLoader(validset, batch_size=batch_size, shuffle=True)

    # Train
    model = traindata(device, model, epochs, optimizer, loss_function, trainloader, validloader)

    # Test
    test(device, model, testloader)

    # torch.save(model.state_dict(), './100_early_stopping_model_dict.pt')

In [37]:
if __name__ == '__main__':
    main()

Device state: cuda:0


RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x25 and 1600x128)