In [1]:
import torch
torch.cuda.set_device(0) 

from model import *
from train import *
from build_dataset import *

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>")) # make screen full width

## build dataset

In [3]:
build_dataset('sy_1234', charlevel=False, min_freq=[2])

0123456789 sy_1234_train length = 134413
size of dict: [(0, 23668), (1, 10)]
size of reduced dict: [(0, 13907), (1, 10)]
0123456789 sy_1234_validate length = 15088
0 sy_1234_test length = 2253


## train syllable model (BI)

In [14]:
# load data and build dict

datasetname = 'sy_1'

with open('dataset/' + datasetname + '_train.json') as file: train_data = json.load(file)
with open('dataset/' + datasetname + '_validate.json') as file: validate_data = json.load(file)
with open('dataset/' + datasetname + '_test.json') as file: test_data = json.load(file)
with open('dataset/' + datasetname + '_dict.json') as file: _to_ix = json.load(file)

print('size of data:', len(train_data), len(validate_data), len(test_data))
x1_to_ix, tag_to_ix = _to_ix

size of data: 134413 15088 2253


In [15]:
model = BiLSTM_CRF(len(x1_to_ix), label_size=len(tag_to_ix), x1emb_dim=512, hidden_dim=1024).cuda()

In [16]:
# quickly check the model
_ = train(model, _to_ix, 20, 160, 0.001, train_data[:1000], train_data[:1000], best_score=0)
result, pred_text, answer_text = evaluate(model, _to_ix, test_data, bs=160)

epoch: 0, progress: 0123456 time: 0:00:02 score: 77.78
epoch: 1, progress: 0123456 time: 0:00:02 score: 86.35
epoch: 2, progress: 0123456 time: 0:00:02 score: 90.47
epoch: 3, progress: 0123456 time: 0:00:02 score: 92.03
epoch: 4, progress: 0123456 time: 0:00:02 score: 91.36
epoch: 5, progress: 0123456 time: 0:00:02 score: 90.75
epoch: 6, progress: 0123456 time: 0:00:01 score: 91.21
epoch: 7, progress: 0123456 time: 0:00:02 score: 89.99
epoch: 8, progress: 0123456 time: 0:00:02 score: 91.54
epoch: 9, progress: 0123456 time: 0:00:02 score: 97.52
epoch: 10, progress: 0123456 time: 0:00:02 score: 97.86
epoch: 11, progress: 0123456 time: 0:00:02 score: 94.72
epoch: 12, progress: 0123456 time: 0:00:02 score: 98.34
epoch: 13, progress: 0123456 time: 0:00:02 score: 98.73
epoch: 14, progress: 0123456 time: 0:00:02 score: 98.78
epoch: 15, progress: 0123456 time: 0:00:02 score: 98.88
epoch: 16, progress: 0123456 time: 0:00:02 score: 99.08
epoch: 17, progress: 0123456 time: 0:00:02 score: 96.84
ep

In [7]:
# train
name_to_save = 'sy_1'

# train 5 epoch at lr 0.001
best_score = train(model, _to_ix, 5, 160, 0.001, train_data, validate_data, 0, name_to_save)

# load best model
model.load_state_dict(torch.load('model/' + name_to_save + '.pth'))
model.train()

# train 5 epoch at lr=0.0005
best_score = train(model, _to_ix, 5, 160, 0.0005, train_data, validate_data, 0, name_to_save)

epoch: 0, progress: 0123456789 time: 0:04:41 score: 95.27
epoch: 1, progress: 0123456789 time: 0:04:48 score: 91.12
epoch: 2, progress: 0123456789 time: 0:04:46 score: 96.12
epoch: 3, progress: 0123456789 time: 0:04:50 score: 96.79
epoch: 4, progress: 0123456789 time: 0:04:50 score: 96.85
epoch: 0, progress: 0123456789 time: 0:04:53 score: 97.41
epoch: 1, progress: 0123456789 time: 0:04:53 score: 97.60
epoch: 2, progress: 0123456789 time: 0:04:58 score: 97.46
epoch: 3, progress: 0123456789 time: 0:04:51 score: 97.30
epoch: 4, progress: 0123456789 time: 0:04:58 score: 97.77


### manually save and load model

In [6]:
name_to_save = 'sy_1'

In [7]:
# load
model.load_state_dict(torch.load('model/' + name_to_save + '.pth'))
model.train()

In [35]:
# # save
# torch.save(model.state_dict(), 'model/' + name_to_save + '.pth')

## train syllable model (Scheme B)

In [4]:
# load data and build dict

datasetname = 'sy_1234'

with open('dataset/' + datasetname + '_train.json') as file: train_data = json.load(file)
with open('dataset/' + datasetname + '_validate.json') as file: validate_data = json.load(file)
with open('dataset/' + datasetname + '_test.json') as file: test_data = json.load(file)
with open('dataset/' + datasetname + '_dict.json') as file: _to_ix = json.load(file)

print('size of data:', len(train_data), len(validate_data), len(test_data))
x1_to_ix, tag_to_ix = _to_ix

size of data: 134413 15088 2253


In [12]:
model = BiLSTM_CRF(len(x1_to_ix), label_size=len(tag_to_ix), x1emb_dim=512, hidden_dim=1024).cuda()

In [13]:
# quickly check the model
_ = train(model, _to_ix, 20, 160, 0.001, train_data[:1000], train_data[:1000], best_score=0)
result, pred_text, answer_text = evaluate(model, _to_ix, test_data, bs=160)

epoch: 0, progress: 0123456 time: 0:00:02 score: 60.92
epoch: 1, progress: 0123456 time: 0:00:02 score: 80.58
epoch: 2, progress: 0123456 time: 0:00:02 score: 88.42
epoch: 3, progress: 0123456 time: 0:00:02 score: 79.96
epoch: 4, progress: 0123456 time: 0:00:02 score: 90.54
epoch: 5, progress: 0123456 time: 0:00:02 score: 93.28
epoch: 6, progress: 0123456 time: 0:00:02 score: 95.05
epoch: 7, progress: 0123456 time: 0:00:02 score: 96.78
epoch: 8, progress: 0123456 time: 0:00:02 score: 97.20
epoch: 9, progress: 0123456 time: 0:00:02 score: 97.39
epoch: 10, progress: 0123456 time: 0:00:02 score: 98.25
epoch: 11, progress: 0123456 time: 0:00:02 score: 98.40
epoch: 12, progress: 0123456 time: 0:00:02 score: 98.73
epoch: 13, progress: 0123456 time: 0:00:02 score: 98.85
epoch: 14, progress: 0123456 time: 0:00:02 score: 99.17
epoch: 15, progress: 0123456 time: 0:00:02 score: 98.60
epoch: 16, progress: 0123456 time: 0:00:02 score: 98.97
epoch: 17, progress: 0123456 time: 0:00:02 score: 99.33
ep

In [7]:
# train
name_to_save = 'sy_1234'

# train 5 epoch at lr 0.001
best_score = train(model, _to_ix, 5, 160, 0.001, train_data, validate_data, 0, name_to_save)

# load best model
model.load_state_dict(torch.load('model/' + name_to_save + '.pth'))
model.train()

# train 5 epoch at lr=0.0005
best_score = train(model, _to_ix, 5, 160, 0.0005, train_data, validate_data, 0, name_to_save)

epoch: 0, progress: 0123456789 time: 0:04:41 score: 95.27
epoch: 1, progress: 0123456789 time: 0:04:48 score: 91.12
epoch: 2, progress: 0123456789 time: 0:04:46 score: 96.12
epoch: 3, progress: 0123456789 time: 0:04:50 score: 96.79
epoch: 4, progress: 0123456789 time: 0:04:50 score: 96.85
epoch: 0, progress: 0123456789 time: 0:04:53 score: 97.41
epoch: 1, progress: 0123456789 time: 0:04:53 score: 97.60
epoch: 2, progress: 0123456789 time: 0:04:58 score: 97.46
epoch: 3, progress: 0123456789 time: 0:04:51 score: 97.30
epoch: 4, progress: 0123456789 time: 0:04:58 score: 97.77


### manually save and load model

In [6]:
name_to_save = 'sy_1234'

In [7]:
# load
model.load_state_dict(torch.load('model/' + name_to_save + '.pth'))
model.train()

In [35]:
# # save
# torch.save(model.state_dict(), 'model/' + name_to_save + '.pth')