In [5]:
from os import listdir
from os.path import join
import os.path

from PIL import Image

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torchvision
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchsummary import summary

import numpy as np
import matplotlib.pyplot as plt

import random

# Dataset
## Kaggle Handwritten math symbols

https://www.kaggle.com/datasets/xainano/handwrittenmathsymbols

In [6]:
import gdown
import patoolib

# google drive link
url       = 'https://drive.google.com/uc?id='
file_id   = '1iVt97n4fpFKUeTW93kNGMzbadRbHCjUf'

# save path
data_path = '../data/'
data_dir  = 'extracted_images'
data_file = 'math_data.rar'

if os.path.isdir(data_path+data_dir):
    print('already exist')

# file download
elif not os.path.isfile(data_path+data_file):
    gdown.download(url + file_id, data_path+data_file, quiet=False)
    patoolib.extract_archive(data_path+data_file, outdir='../data/')
    
else:
    patoolib.extract_archive(data_path+data_file, outdir='../data/')


already exist


In [7]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'using {device}')

random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

training_epochs = 25
batch_size = 256

using cuda


In [8]:
# image preprocessing
# grayscale, 
# option: resize
trans = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor() 
    ])
trainset = torchvision.datasets.ImageFolder(root = data_path + 'extracted_images',
                                            transform = trans)
classes = trainset.classes
len(trainset.classes)

print(trainset.classes[63])
print(len(trainset.targets))

pi
375974


In [9]:
train_size = int(0.8 * len(trainset))
test_size = len(trainset) - train_size

train_data, test_data = torch.utils.data.random_split(trainset, [train_size, test_size])

train_loader = DataLoader(train_data,
                         batch_size = batch_size,
                         shuffle = True,
                         drop_last = True)

test_loader = DataLoader(test_data,
                         batch_size = batch_size,
                         shuffle = False,
                         drop_last = True)

print(f'train size = {train_size}\ntest size = {test_size}')
print('')
print(f'training data set = {len(train_loader)}\ntest data set = {len(test_loader)}')


images, labels = next(iter(train_loader))
images.shape, labels.shape

train size = 300779
test size = 75195

training data set = 1174
test data set = 293


(torch.Size([256, 1, 45, 45]), torch.Size([256]))

In [10]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        # output = ( input - filter(kernel) + 2 * padding) / stride ) + 1
        self.conv1 = nn.Conv2d(1, 64, 3, 1) # input, output, kernel_size, stride
        self.conv2 = nn.Conv2d(64, 128, 3, 1)
        self.dropout = nn.Dropout()
 
        # w*h*output
        self.fc1 = nn.Linear(9*9*128, 1000)
        self.fc2 = nn.Linear(1000, 82)
        

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = self.dropout(x)
        x = x.view(-1, 9*9*128)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [11]:
model = Net()
model.to(device)
summary(model, input_size=(1, 45, 45))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 43, 43]             640
            Conv2d-2          [-1, 128, 19, 19]          73,856
           Dropout-3            [-1, 128, 9, 9]               0
            Linear-4                 [-1, 1000]      10,369,000
            Linear-5                   [-1, 82]          82,082
Total params: 10,525,578
Trainable params: 10,525,578
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 1.34
Params size (MB): 40.15
Estimated Total Size (MB): 41.50
----------------------------------------------------------------


In [12]:
# define the negative log-likelihood loss
loss_func = nn.NLLLoss(reduction='sum')

# define the Adam optimizer
opt = optim.Adam(model.parameters(), lr=1e-4)


In [13]:
# define a helper function to compute the loss value per mini-batch
def loss_batch(loss_func, xb, yb, yb_h, opt=None):
    # obtain loss
    loss = loss_func(yb_h, yb)
    # obtain performance metric
    metric_b = metrics_batch(yb, yb_h)
    if opt is not None:
        loss.backward() # compute gradient
        opt.step() # update parameters
        opt.zero_grad() # set gradients to zero
    return loss.item(), metric_b
    
# define a helper function to compute the accurary per mini-batch
def metrics_batch(target, output):
    # optain output class
    pred = output.argmax(dim=1, keepdim=True)
    # compare output class with target class
    corrects = pred.eq(target.view_as(pred)).sum().item()
    
    return corrects
    
# define a helper fuction to compute the loss and metric values for a dataset
def loss_epoch(model, loss_func, dataset_dl, opt=None):
    loss = 0.0
    metric = 0.0
    len_data = len(dataset_dl.dataset)
    
    for xb, yb in dataset_dl:
        xb = xb.type(torch.float).to(device)
        yb = yb.to(device)
        # obtain model output
        yb_h = model(xb)
        
        loss_b, metric_b = loss_batch(loss_func, xb, yb, yb_h, opt)
        loss += loss_b
        if metric_b is not None:
            metric += metric_b
            
    loss /= len_data
    metric /= len_data
    return loss, metric

In [14]:
# define train_val function
def train_val(epochs, model, loss_func, opt, train_dl, val_dl):
    for epoch in range(epochs):
        model.train() # convert to train mode
        train_loss, train_metric = loss_epoch(model, loss_func, train_dl, opt)
        model.eval() # convert to evaluation mode
        with torch.no_grad():
            val_loss, val_metric = loss_epoch(model, loss_func, val_dl)
        accuracy = 100 * val_metric
        print('epoch: %d, train loss: %.6f, val loss: %.6f, accuracy: %.2f' %(epoch, train_loss, val_loss, accuracy))

In [15]:
train_val(training_epochs, model, loss_func, opt, train_loader, test_loader)

epoch: 0, train loss: 1.051639, val loss: 0.476066, accuracy: 86.75
epoch: 1, train loss: 0.408563, val loss: 0.322830, accuracy: 90.44
epoch: 2, train loss: 0.296978, val loss: 0.253427, accuracy: 91.97
epoch: 3, train loss: 0.236852, val loss: 0.206183, accuracy: 93.63
epoch: 4, train loss: 0.195981, val loss: 0.170838, accuracy: 94.62
epoch: 5, train loss: 0.166162, val loss: 0.145606, accuracy: 95.45
epoch: 6, train loss: 0.142217, val loss: 0.126099, accuracy: 95.88
epoch: 7, train loss: 0.123769, val loss: 0.110348, accuracy: 96.51
epoch: 8, train loss: 0.108115, val loss: 0.095225, accuracy: 96.76
epoch: 9, train loss: 0.097351, val loss: 0.085227, accuracy: 97.11
epoch: 10, train loss: 0.086848, val loss: 0.078987, accuracy: 97.37
epoch: 11, train loss: 0.079232, val loss: 0.075723, accuracy: 97.37
epoch: 12, train loss: 0.073872, val loss: 0.066854, accuracy: 97.68
epoch: 13, train loss: 0.067967, val loss: 0.062936, accuracy: 97.89
epoch: 14, train loss: 0.063951, val loss: 0

In [126]:
from datetime import datetime

now = datetime.now()

torch.save(model.state_dict(), f"../model/model_{now.year}{now.month}{now.day}{now.hour}{now.minute}{now.second}.pth")

# Reference

https://deep-learning-study.tistory.com/459