In [8]:
import sys
import mxnet as mx
from mxnet import gluon, autograd ,nd
import argparse
from trainer import  setting_ctx, Train, char_beam_search
from data_loader import LipsDataset
from models.network import LipNet
from mxnet.gluon.data.vision import transforms
from tqdm import tqdm, trange
from utils.common import *

## Set the argument

In [2]:
args = dict()
args['batch_size'] = 64
args['epochs'] = 100
args['image_path'] = '/home/ubuntu/works/2018/lips_model/data/datasets/'
args['align_path'] = '/home/ubuntu/works/2018/lips_model/data/align/'
args['dr_rate'] = 0.5
args['use_gpu'] = True
args['num_workers'] = 2

ctx = setting_ctx(args['use_gpu'])

## Prepare the Data

In [3]:
input_transform  = transforms.Compose([transforms.ToTensor()
                                    , transforms.Normalize((0.7136,0.4906,0.3283),(0.1138,0.1078,0.0917))
                                 ])
training_dataset = LipsDataset(args['image_path'], args['align_path'], transform=input_transform)
train_dataloader = mx.gluon.data.DataLoader(training_dataset, batch_size=args['batch_size'], shuffle=True,num_workers=args['num_workers'] )

## Create the Model

In [4]:
net = LipNet(args['dr_rate'])
net.initialize(ctx=ctx)

loss_fn = gluon.loss.CTCLoss()
trainer = gluon.Trainer(net.collect_params(),optimizer='adam',optimizer_params={'learning_rate':1e-4,'beta1':0.9,'beta2':0.999})

## Fit the Model

In [6]:
best_loss = sys.maxsize
for e in trange(args['epochs']):
    i = 0;
    for input_data, label in tqdm(train_dataloader):
        input_data = nd.transpose(input_data,(0,2,1,3,4))
        input_data = input_data.copyto(ctx)
        label = label.copyto(ctx)
        
        with autograd.record():
            with autograd.train_mode():
                out = net(input_data)
            
                loss_val = loss_fn(out,label)
                loss_val.backward()
        trainer.step(input_data.shape[0])
        
        if i % 20 == 0:
            print("epoch:{e} iter:{i} loss:{l}".format(e=e,i=i,l=loss_val.mean().asscalar()))
            current_loss = loss_val.mean().asscalar()
            if best_loss > current_loss:
                net_file_name = "best_model_epoches_"+str(e)+"iter_"+str(i)+"loss_"+str(round(current_loss,2))
                net.save_parameters('./checkpoint/'+net_file_name)
                best_loss = current_loss
        i = i+1

  0%|          | 0/498 [00:00<?, ?it/s]


## Evaluate the Model

In [9]:
## load best network weights
net.load_parameters('./checkpoint/best_model_epoches_73_iter_650loss_1.02')
pred = net(input_data)
pred_convert = char_beam_search(pred)

label_convert = char_conv(label.asnumpy())
for t,p in zip(label_convert,pred_convert):
    print("target:{t}  pred:{p}".format(t=t,p=p))

target:set red in l two soon  pred:set red in s two soon
target:place green at x six soon  pred:place green at x six soon
target:lay green by x nine soon  pred:lay green by x nine soon
target:lay white at d seven again  pred:lay white at d seven again
target:place green with k seven again  pred:place green with k seven again
target:place blue with i seven soon  pred:place blue with i seven soon
target:lay blue by j five please  pred:lay blue by j five please
target:place green at d nine soon  pred:place green at c nine soon
target:set white in h three now  pred:set white in h three now
target:set green in l seven now  pred:set green in l seven now
target:lay blue with c six soon  pred:lay blue with t six soon
target:place blue at i one soon  pred:place blue at i one soon
target:lay red at v six now  pred:lay red in v six now
target:place red by j one again  pred:place red by j one again
target:bin white with n five again  pred:bin white with n five again
target:set blue at z six please