In [1]:
import copy,os
import torch
import utils
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import matplotlib.pyplot as plt
import models.crnn as crnn

from PIL import Image
from skimage import io
from keys import getAlphabet
from sklearn.model_selection import train_test_split
from torchvision import transforms, models
from torch.utils.data import Dataset,DataLoader
from torch.autograd import Variable

In [None]:
class ImageDataset(Dataset):
    def __init__(self,root,train=True,type='English'):
        self.train = train
        #图像增强，训练集使用totensor、归一化
        self.train_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.5,], [0.5,])
        ])
        #图像增强，验证集使用totensor、归一化
        self.test_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.5,], [0.5,])
        ])
        if type == 'English':
            self.img = np.load(os.path.join(root,'English-data.npy'))#读入图片
            self.label = pd.read_csv(os.path.join(root,'English-label.csv')) #读入label
            self.label = self.label['0'].values
        else:
            self.img = np.load(os.path.join(root,'Russian-data.npy'))#读入图片
            self.label = pd.read_csv(os.path.join(root,'Russian-label.csv')) #读入label
            self.label = self.label['0'].values
        
        self.train_img, self.test_img, self.train_label, self.test_label = train_test_split(self.img,self.label,test_size=0.25)
        
        self.train_img = np.reshape(self.train_img,(-1,32,128))
        self.test_img = np.reshape(self.test_img,(-1,32,128))
            
    def __getitem__(self,index):
        if self.train:#如果是“训练”模式
            target = self.train_label[index]
            img = self.train_img[index]
            # 图片变换
            img = Image.fromarray(img)
            img = self.train_transform(img)
            return img,target
        else:
            target = self.test_label[index]
            img = self.test_img[index]
            img = Image.fromarray(img)
            img = self.test_transform(img)
            return img,target
            
    def __len__(self):
        if self.train:
            return len(self.train_img)
        else:
            return len(self.test_img)

In [2]:
type = 'Russian'
weight_path = r'./'+type+'-weight.pth'
pretrained = False

BATCH_SIZE = 64
alphabet = getAlphabet(type)
nclass = len(alphabet) + 1
nh = 256
ngpu = 1
loss_avg = utils.averager()
converter = utils.strLabelConverter(alphabet)
criterion = nn.CTCLoss()
criterion = criterion.cuda()

train_dataset = ImageDataset(root=r'./',train=True,type=type)
train_loader = DataLoader(dataset=train_dataset,batch_size=BATCH_SIZE,shuffle=True)
val_dataset = ImageDataset(root=r'./',train=False,type=type)
val_loader = DataLoader(dataset=val_dataset,batch_size=BATCH_SIZE,shuffle=True)

In [3]:
model = crnn.CRNN(32, 1, nclass, nh, ngpu)
model.cuda()
if pretrained:
    model.load_state_dict(torch.load(weight_path))

optimizer = optim.SGD(model.parameters(),lr = 0.1,momentum=0.9,weight_decay=0.00004)
#optimizer = optim.Adam(model.parameters(), lr=0.01, betas=(0.5, 0.999))

losses = [] 
acces = []
eval_losses = []
eval_acces = []
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1,gamma=0.5)#每一个epoch，学习率减小到原来的50%
for echo in range(10):
    train_loss = 0#定义训练损失
    train_acc = 0#定义训练准确度
    model.train()#将网络转化为训练模式
    
    if np.mod(echo,5) == 4:
        scheduler.step()
    for i,(X,label) in enumerate(train_loader):
        X = Variable(X).cuda()#包装tensor用于自动求梯度
        text, length = converter.encode(label)
        preds = model(X)
        #out = F.log_softmax(preds)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * preds.size(1)))
        cost = criterion(preds, text, preds_size, length)
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        loss_avg.add(cost)
    losses.append(loss_avg.val())
    print("echo:"+' ' +str(echo))
    print("train-loss:" + ' ' + str(loss_avg.val()))
    trloss, = plt.plot(losses)
    loss_avg.reset()
    
    model.eval()
    n_correct = 0
    for i,(X,label) in enumerate(val_loader):
        img = X
        X = Variable(X).cuda()#包装tensor用于自动求梯度
        text, length = converter.encode(label)
        preds = model(X)
        #out = F.log_softmax(preds)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * preds.size(1)))
        cost = criterion(preds, text, preds_size, length)
        loss_avg.add(cost)
        
        _, preds = preds.max(2)
        #preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        #print(sim_preds)
        for pred, target in zip(sim_preds, label):
            if pred.strip() == target.strip():
                n_correct += 1

    accuracy = n_correct / float(len(val_loader) * BATCH_SIZE)
    eval_losses.append(loss_avg.val())
    eval_acces.append(accuracy)
    print("test-loss:" + ' ' + str(loss_avg.val()))
    print("accuracy:"+' '+str(accuracy))
    loss_avg.reset()
    
    teloss, = plt.plot(eval_losses)
    plt.legend(handles=[trloss,teloss],labels=['train-loss','test-loss'],loc='upper right')
    plt.show()
    plt.plot(eval_acces)
    plt.show()

In [61]:
#torch.save(model.state_dict(),weight_path)

In [75]:
class testDataset(Dataset):
    def __init__(self,root):
        #图像增强，验证集使用totensor、归一化
        self.test_transform = transforms.Compose([
            transforms.Resize(size=[32,128]),
            transforms.ToTensor(),
            transforms.Normalize([0.5,], [0.5,])
        ])
        
        self.filelist = []
        file = os.listdir(root)
        for filename in file:
            self.filelist.append(root+'/'+filename)
            
    def __getitem__(self,index):
        img = io.imread(self.filelist[index],as_gray=True)
        img = Image.fromarray(img)
        img = self.test_transform(img)
        print(self.filelist[index])
        return img,' '
            
    def __len__(self):
        return len(self.filelist)

In [76]:
type = 'Russian'
weight_path = r'./'+type+'-weight.pth'

alphabet = getAlphabet(type)
nclass = len(alphabet) + 1
nh = 256
ngpu = 1

converter = utils.strLabelConverter(alphabet)
criterion = nn.CTCLoss()
criterion = criterion.cuda()

test_dataset = testDataset(root=r'./test')
test_loader = DataLoader(dataset=test_dataset,batch_size=1,shuffle=False)

test_model = crnn.CRNN(32, 1, nclass, nh, ngpu)
test_model.cuda()
test_model.load_state_dict(torch.load(weight_path))

<All keys matched successfully>

In [77]:
result = []
model.eval()#将网络转化为训练模式
for i,(X,label) in enumerate(test_loader):
    X = Variable(X).cuda()#包装tensor用于自动求梯度
    preds = model(X)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * preds.size(1)))
    _, preds = preds.max(2)
    #preds = preds.squeeze(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)
    sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
    result.append(sim_preds)

./test/test2.jpg
./test/test3.jpg
./test/test1.jpg


In [78]:
result

['модули иртибот', 'иртиботмодули ', 'модули иртибот']