# 繁體中文字辨識

### 事前準備
* 請先至https://github.com/AI-FREE-Team/Traditional-Chinese-Handwriting-Dataset 下載資料
* 四個壓縮檔解壓縮至raw資料夾

* 也可使用AI FREE TEAM所提供的Data_Deployment_local.ipynb，
* 這樣dataloader就要修改。

In [21]:
import random
import time
import os
import copy
from PIL import Image
from sys import stdout
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchsummary import summary
from torchvision import transforms
import torch.nn.functional as F

<hr>
STEP1: <br>
給定每個中文字一個ID，存放在class_map字典中。

In [2]:
raw_img_dir = 'data/raw'
images = os.listdir(raw_img_dir)
classes = set([x.split('_')[0] for x in images])
class_map = {i: j for i, j in zip(classes, range(len(classes)))}

<hr>
STEP2: <br>
客製Dataset，回傳PIL讀取圖片的矩陣跟class_map內對應的ID

In [3]:
class MakeDataset(Dataset):
    def __init__(self, img_list, img_dir, n_class=len(classes), label_dict=class_map, trans=None):
        self.images = img_list
        self.images_dir = img_dir
        self.n_classes = n_class
        self.label = [x.split('_')[0] for x in img_list]
        self.labels_map = label_dict
        self.transform = trans

    def __len__(self):
        return len(self.images)

    def __getitem__(self, item):
        img_name = self.images[item]
        label = self.labels_map[self.label[item]]
        img = Image.open(os.path.join(self.images_dir, img_name))
        if self.transform is not None:
            img = self.transform(img)
        return img, label

<hr>
STEP3: <br>
建立DL模型，也可使用Pre Train Model，<br>
這邊只跑了HelloCNN，<br>
另外兩個模型是想要比較Batch Normalization在Activation Function前後的差異。<br>


In [4]:
class HelloCNN(nn.Module):
    def __init__(self):
        super(HelloCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=0),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(64, 128, kernel_size=3, padding=0),
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(128, 128, kernel_size=3, padding=0),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(128, 256, kernel_size=3, padding=0),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(256, 32, kernel_size=1, padding=1),
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(3200, 4803),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        # x = torch.mean(x.view(x.size(0), x.size(1), -1), dim=2)
        out = self.classifier(x)
        out = F.log_softmax(out, dim=1)
        return out
    

class HelloCNNwithBNbeforeRelu(nn.Module):
    def __init__(self):
        super(HelloCNNwithBNbeforeRelu, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=0),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(64, 128, kernel_size=3, padding=0),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(128, 128, kernel_size=3, padding=0),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(128, 256, kernel_size=3, padding=0),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(256, 32, kernel_size=1, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(3200, 4803),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        # x = torch.mean(x.view(x.size(0), x.size(1), -1), dim=2)
        out = self.classifier(x)
        out = F.log_softmax(out, dim=1)
        return out

    
    class HelloCNNwithBNafterRelu(nn.Module):
    def __init__(self):
        super(HelloCNNwithBNafterRelu, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=0),
            nn.LeakyReLU(inplace=True),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 128, kernel_size=3, padding=0),
            nn.LeakyReLU(inplace=True),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(128, 128, kernel_size=3, padding=0),
            nn.LeakyReLU(inplace=True),
            nn.BatchNorm2d(128),
            nn.Conv2d(128, 256, kernel_size=3, padding=0),
            nn.LeakyReLU(inplace=True),
            nn.BatchNorm2d(256),
            nn.Conv2d(256, 32, kernel_size=1, padding=1),
            nn.LeakyReLU(inplace=True),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(3200, 4803),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        # x = torch.mean(x.view(x.size(0), x.size(1), -1), dim=2)
        out = self.classifier(x)
        out = F.log_softmax(out, dim=1)
        return out

<hr>
STEP4:<br>
建立訓練流程。<br><br>

In [5]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        since_ = time.time()
        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            # epoch init
            sample_size = 0

            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in data_loaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                sample_size += len(labels)
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

                # print log
                stdout.write(
                    '\r%s' %
                    '[{:5d}/{} ({:.1f}%)]\tLoss: {:.4f}\t Acc: {:.5f}\t'.format(
                        sample_size,
                        dataset_sizes[phase],
                        100.0 * sample_size / dataset_sizes[phase],
                        loss.item(),
                        running_corrects.double() / sample_size)
                )
                stdout.flush()

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            print()
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
        t_ = time.time() - since_
        print()
        print('Epoch Time Costs: {:.0f}m {:.0f}s'.format(t_ // 60, t_ % 60))
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

<hr>
STEP5:<br>
切分訓練集與驗證集，這邊還沒切測試集，不過應該要切三組才正常。<br>

In [None]:
random.shuffle(images)
train_image = images[:int(len(images)*0.8)]
valid_image = images[int(len(images)*0.8):]

In [7]:
image_sets = {
    'train': train_image,
    'valid': valid_image,
}

<hr>
這組資料集所有圖片都是50\*50，<br>
如果要用遷移學習，可依照各Pre train model的input shape做修改。<br>
例如VGG改成(224, 224)

In [8]:
image_size = (50, 50)

In [9]:
normalizer = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                  std=[0.229, 0.224, 0.225])

<hr>
STEP6:<br>
建立transforms，丟入MakeData做資料轉換。<br>
最少須包含ToTensor()。

In [10]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((52, 52)),
        transforms.CenterCrop(image_size),
        transforms.RandomRotation(degrees=5),
        transforms.ToTensor(),
        normalizer
    ]),
    'valid': transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        normalizer
    ]),
    'test': transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        normalizer
    ]),
}

In [11]:
image_dataset = {
    x: MakeDataset(image_sets[x], raw_img_dir, trans=data_transforms[x])
    for x in ['train', 'valid']
}

<hr>
STEP7:<br>
建立data loader。<br>
依照GPU的RAM以及網路參數量調整batch_size。

In [12]:
data_loaders = {x: DataLoader(image_dataset[x],
                              batch_size=32,
                              shuffle=True if x == 'train' else False)
                for x in ['train', 'valid']}

In [13]:
dataset_sizes = {x: len(image_dataset[x]) for x in ['train', 'valid']}

<hr>
STEP8:<br>
抓取現在GPU狀況。<br>
用.to(device)會比.cuda()好一些，<br>
在沒有GPU的環境才不用再修改。

In [14]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
model_list = [
    HelloCNN(),
    HelloCNNwithBNbeforeRelu(),
    HelloCNNwithBNafterRelu(),
]

In [15]:
model_ft = HelloCNN().to(device)

In [16]:
summary(model_ft, (3, 50, 50))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 48, 48]           1,792
         LeakyReLU-2           [-1, 64, 48, 48]               0
            Conv2d-3          [-1, 128, 46, 46]          73,856
         LeakyReLU-4          [-1, 128, 46, 46]               0
         MaxPool2d-5          [-1, 128, 23, 23]               0
            Conv2d-6          [-1, 128, 21, 21]         147,584
         LeakyReLU-7          [-1, 128, 21, 21]               0
            Conv2d-8          [-1, 256, 19, 19]         295,168
         LeakyReLU-9          [-1, 256, 19, 19]               0
           Conv2d-10           [-1, 32, 21, 21]           8,224
        LeakyReLU-11           [-1, 32, 21, 21]               0
        MaxPool2d-12           [-1, 32, 10, 10]               0
           Linear-13                 [-1, 4803]      15,374,403
Total params: 15,901,027
Trainable para

In [17]:
criterion = nn.CrossEntropyLoss()

In [18]:
optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.0001)

In [19]:
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1)

In [20]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=5)

Epoch 0/4
----------
train Loss: 5.1077 Acc: 0.2216
valid Loss: 2.2442 Acc: 0.5262

Epoch Time Costs: 24m 0s

Epoch 1/4
----------
train Loss: 1.1596 Acc: 0.7293
valid Loss: 1.2912 Acc: 0.7112

Epoch Time Costs: 22m 42s

Epoch 2/4
----------
train Loss: 0.4999 Acc: 0.8711
valid Loss: 1.1649 Acc: 0.7507

Epoch Time Costs: 23m 2s

Epoch 3/4
----------
train Loss: 0.1532 Acc: 0.9606
valid Loss: 1.0390 Acc: 0.8051

Epoch Time Costs: 23m 3s

Epoch 4/4
----------
train Loss: 0.1099 Acc: 0.9717
valid Loss: 1.0159 Acc: 0.8135

Epoch Time Costs: 23m 9s

Training complete in 115m 56s
Best val Acc: 0.813513


用簡單的架構做測試會有很大的Over-fitting，<br>
可以加入一些解決over-fitting的作法，<br>
例如BN或Dropout，<br>
或是增加數據增強做法，<br>
例如Cutout, Cutmix, Mixup等方法，https://zhuanlan.zhihu.com/p/104992391 <br>

In [23]:
# Save Model
torch.save(model_ft, model_ft.__class__.__name__ + 'model.pkl')
torch.save(model_ft.state_dict(), model_ft.__class__.__name__ + 'model_params.pkl')

  "type " + obj.__name__ + ". It won't be checked "
