In [1]:
import sys
import os
import re
import cv2
import glob
from google.colab.patches import cv2_imshow
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from unicodedata import normalize
from sklearn.model_selection import train_test_split

In [2]:
paths = glob.glob('/content/drive/My Drive/PerspectiveImages/' + '*')
classes = {'0' : 0, '1' : 1, '2' : 2, '3' : 3, '4' : 4, '5' : 5, '6' : 6, '7' : 7, '8' : 8, '9' : 9, 
           '가' : 10, '나' : 11, '다' : 12, '라' : 13, '마' : 14, '거' : 15, '너' : 16, '더' : 17, '러' : 18, '머' : 19, 
           '버' : 20, '서' : 21, '어' : 22, '저' : 23, '고' : 24, '노' : 25, '도' : 26, '로' : 27, '모' : 28, '보' : 29, 
           '소' : 30, '오' : 31, '조' : 32, '구' : 33, '누' : 34, '두' : 35, '루' : 36, '무' : 37, '부' : 38, '수' : 39, 
           '우' : 40, '주' : 41, '하' : 42, '호' : 43}


data_exclusion는 영엽용 번호판과 초록색 번호판을 제외하고 흰색과 검정색으로 구성된 이미지와 그 이미지의 이름을 반환한다.

In [3]:
def data_exclusion(paths):
    re_data = []
    for path in paths:
        name = path[-12 : -4]

        if name[0].isdigit():   # 영업용(한글로 시작) 제거
            # 초록색 제거
            cur_img = cv2.imread(path, 0)
            blur = cv2.GaussianBlur(cur_img,(5,5),0)
            _, img = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)

            cnt = [0, 0]
            for i in range(5, 15):  # 배경 영역인 좌측 상단의 색 판단
                if img[i,i] == 0:
                    cnt[0] +=1
                elif img[i, i] == 255:
                    cnt[1] +=1
            if cnt[0] < cnt[1]: # 해당 영역에 흰색이 더 많은 경우 초록색이 아니다.
                re_data.append((img, name))
               
    return re_data

In [4]:
data = data_exclusion(paths)

In [5]:
transform = transforms.Compose([transforms.ToPILImage(),
                                transforms.Resize((32, 224)),
                                transforms.ToTensor()])

In [6]:
X = np.zeros((len(data), 32, 224))
y = np.zeros((len(data), 7))

for i, (img, name) in enumerate(data):
    X[i] = transform(img)
    y[i, 0] = classes[name[0]]
    y[i, 1] = classes[name[1]]
    y[i, 3] = classes[name[4]]
    y[i, 4] = classes[name[5]]
    y[i, 5] = classes[name[6]]
    y[i, 6] = classes[name[7]]

    hangul = normalize('NFC', name[2: 4])
    y[i, 2] = classes[hangul]

train, validation, test set을 만들기 위해 이미지와 라벨을 나눈다.

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=1)
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

(516, 32, 224) (516, 7)
(173, 32, 224) (173, 7)
(173, 32, 224) (173, 7)


이미지와 라벨을 받아 데이터셋을 만들어준다.

In [8]:
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X.reshape(X.shape[0], 1, X.shape[1], X.shape[2])
        self.y = torch.LongTensor(y)
        print(self.X.shape)
        print(self.y.shape)

    def __len__(self):
        return self.y.shape[0]

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [9]:
train_set = CustomDataset(X_train, y_train)
val_set = CustomDataset(X_val, y_val)
test_set = CustomDataset(X_test, y_test)

(516, 1, 32, 224)
torch.Size([516, 7])
(173, 1, 32, 224)
torch.Size([173, 7])
(173, 1, 32, 224)
torch.Size([173, 7])


In [10]:
BATCHSIZE = 16

train_loader = DataLoader(train_set, batch_size=BATCHSIZE, shuffle=True)
val_loader = DataLoader(val_set, batch_size=BATCHSIZE, shuffle=False)
test_loader = DataLoader(test_set, batch_size=BATCHSIZE, shuffle=False)

# CNN + RNN 모델 생성
이미지를 CNN에 통과시키고 GRU에 넣는다.


In [11]:
class Net(nn.Module):
    def __init__(self, hidden_size, output_size, batch_size, num_layers):
        super(Net, self).__init__()
        self.hidden_size = hidden_size
        self.batch_size = batch_size

        self.conv = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d((2, 2)),
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d((2, 2)),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d((2, 2)),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
        )

        self.rnn = nn.GRU(2048, hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        n = x.shape[0]
        x = self.conv(x)
        x = x.reshape(n, 7, 2048)

        x, hidden = self.rnn(x, hidden)
        x = self.fc(x)
        return x, hidden

In [56]:
def train(train_loader, val_loader, model, batch_size, n_epochs, lr):
    optimizer = optim.AdamW(params=model.parameters(), lr=lr, weight_decay=0.0001)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, patience=3, verbose=True)
    loss_fn = nn.CrossEntropyLoss()

    best = {"loss": sys.float_info.max}
    patience = 0

    for e in range(n_epochs):
        model.train()
        train_loss = 0
        for x, y in train_loader:
            x = x.type(torch.FloatTensor).to(device)
            y = y.to(device)
            hidden = torch.zeros(NUMLAYERS, x.shape[0], HIDDENSIZE)
            hidden = hidden.to(device)
            optimizer.zero_grad()
            output, hidden = model(x, hidden)
            loss = 0
            for i in range(7):
                predict = output[:, i]
                target = y[:, i]
                loss += loss_fn(predict, target)
            loss /= 7
            loss.backward()
            
            optimizer.step()

            train_loss += loss.item()
        train_loss /= len(train_loader)

        model.eval()
        valid_loss = 0
        with torch.no_grad():
            for x, y in val_loader:
                x = x.type(torch.FloatTensor).to(device)
                y = y.to(device)
                hidden = torch.zeros(NUMLAYERS, x.shape[0], HIDDENSIZE).to(device)
                output, hidden = model(x, hidden)
                loss = 0
                for i in range(7):
                    predict = output[:, i]
                    target = y[:, i]
                    loss += loss_fn(predict, target)
                valid_loss += loss.item()
            valid_loss /= len(val_loader) * 7

        if valid_loss < best["loss"]:
            best["loss"] = valid_loss
            best["epoch"] = e + 1
            best["state"] = model.state_dict()
            patience = 0

        if patience > 5:
            print("Best loss: %.4f"%(best["loss"]))
            break

        if e % 10 == 9:
            print("[%2d] Train loss : %.4f    Valid loss: %.4f      Best loss: %.4f"%(e+1, train_loss, valid_loss, best["loss"]))
        scheduler.step(metrics=valid_loss)      
        patience +=1

    return best

NUMLAYERS = 3
HIDDENSIZE = 50
NUM_CLASS = len(classes)
model = Net(HIDDENSIZE, NUM_CLASS, BATCHSIZE, NUMLAYERS)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

EPOCH = 150
BEST = train(train_loader, val_loader, model, BATCHSIZE, EPOCH, 0.001)

[10] Train loss : 1.4012    Valid loss: 1.3964      Best loss: 1.3964
[20] Train loss : 0.5103    Valid loss: 0.5914      Best loss: 0.5914
Epoch    29: reducing learning rate of group 0 to 1.0000e-04.
[30] Train loss : 0.3805    Valid loss: 0.5334      Best loss: 0.5334
[40] Train loss : 0.3367    Valid loss: 0.5048      Best loss: 0.5048
[50] Train loss : 0.3203    Valid loss: 0.4996      Best loss: 0.4983
Epoch    57: reducing learning rate of group 0 to 1.0000e-05.
[60] Train loss : 0.3072    Valid loss: 0.4952      Best loss: 0.4915
Epoch    62: reducing learning rate of group 0 to 1.0000e-06.
Best loss: 0.4915


최적의 모델 저장

In [57]:
with open("model.pt", "wb") as f:
    torch.save(
        {
            "state": BEST["state"],
            "best_epoch": BEST["epoch"],
        },
        f,
    )

모델을 불러와서 테스트

In [58]:
model = Net(HIDDENSIZE, NUM_CLASS, BATCHSIZE, NUMLAYERS)
model.to(device)

with open("model.pt", "rb") as f:
    SAVED_MODEL = torch.load(f)

model.load_state_dict(SAVED_MODEL["state"])

model.eval()
test_loss = 0
cnt = 0
test_correct = np.zeros(7)

loss_fn = nn.CrossEntropyLoss()

with torch.no_grad():
    for x, y in test_loader:
        x = x.type(torch.FloatTensor).to(device)
        y = y.to(device)
        hidden = torch.zeros(NUMLAYERS, x.shape[0], HIDDENSIZE).to(device)
        output, hidden = model(x, hidden)

        loss = 0
        for i in range(7):
            predict = output[:, i]
            target = y[:, i]
            loss += loss_fn(predict, target)    #(16, 44), (16)
        
            
        for i, prediction in enumerate(output.max(axis=2)[1]):
            cnt += 1
            for j in range(7):
                if prediction[j].eq(y[i,j]).item():
                    test_correct[j] +=1
                    
        test_loss += loss.item()
    test_loss /= cnt
    test_acc = test_correct.sum() / (cnt * 7)
    
print("total loss: %.4f, total acc: %.4f"%(test_loss, test_acc) )
print("\n각 자리의 정확도")
print(test_correct / cnt)

total loss: 0.2598, total acc: 0.8489

각 자리의 정확도
[0.95953757 0.93641618 0.26589595 0.95953757 0.94797688 0.94219653
 0.93063584]
