In [0]:

!git clone -b master https://github.com/WellsCui/speech-to-text.git
!mv -f ./speech-to-text/* ./




In [1]:
!python train.py -c config.json

use device: cpu
loading train data ...
remaining train data length: 0
pushing new round train data: 0
pushed new train data
pushed new train data
pushed new train data
target_padded_chars_2 shape: torch.Size([171, 2])
epoch 0, iter 1, avg. loss 753.04, avg. ppl 22313678724742.12 cum. examples 2, speed 0.75 words/sec, time elapsed 65.24 sec
target_padded_chars_2 shape: torch.Size([171, 2])
epoch 1, iter 2, avg. loss 561.88, avg. ppl 9121568811.91 cum. examples 4, speed 0.79 words/sec, time elapsed 127.00 sec
target_padded_chars_2 shape: torch.Size([171, 2])
epoch 2, iter 3, avg. loss 500.10, avg. ppl 732686851.78 cum. examples 6, speed 0.79 words/sec, time elapsed 188.83 sec
target_padded_chars_2 shape: torch.Size([171, 2])
^C
Process Process-2:
Traceback (most recent call last):
  File "/anaconda3/envs/tf2/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/anaconda3/envs/tf2/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._tar

In [0]:


def evaluate_ppl(model, dev_data, batch_size=32):
    """ Evaluate perplexity on dev sentences
    @param model (NMT): NMT Model
    @param dev_data (list of (src_sent, tgt_sent)): list of tuples containing source and target sentence
    @param batch_size (batch size)
    @returns ppl (perplixty on dev sentences)
    """
    was_training = model.training
    model.eval()

    cum_loss = 0.
    cum_tgt_words = 0.

    # no_grad() signals backend to throw away all gradients
    with torch.no_grad():
        for src_sents, tgt_sents in batch_iter(dev_data, batch_size):
            loss = -model(src_sents, tgt_sents).sum()

            cum_loss += loss.item()
            tgt_word_num_to_predict = sum(len(s[1:]) for s in tgt_sents)  # omitting leading `<s>`
            cum_tgt_words += tgt_word_num_to_predict

        ppl = np.exp(cum_loss / cum_tgt_words)

    if was_training:
        model.train()

    return ppl


def compute_corpus_level_bleu_score(references: List[List[str]], hypotheses: List[Hypothesis]) -> float:
    """ Given decoding results and reference sentences, compute corpus-level BLEU score.
    @param references (List[List[str]]): a list of gold-standard reference target sentences
    @param hypotheses (List[Hypothesis]): a list of hypotheses, one for each reference
    @returns bleu_score: corpus-level BLEU score
    """
    if references[0][0] == '<s>':
        references = [ref[1:-1] for ref in references]
    bleu_score = corpus_bleu([[ref] for ref in references],
                             [hyp.value for hyp in hypotheses])
    return bleu_score


def beam_search(model: NMT, test_data_src: List[List[float]], beam_size: int, max_decoding_time_step: int) -> List[List[Hypothesis]]:
    """ Run beam search to construct hypotheses for a list of src-language sentences.
    @param model (NMT): NMT Model
    @param test_data_src (List[List[float]]): List of sentences (words) in source language, from test set.
    @param beam_size (int): beam_size (# of hypotheses to hold for a translation at every step)
    @param max_decoding_time_step (int): maximum sentence length that Beam search can produce
    @returns hypotheses (List[List[Hypothesis]]): List of Hypothesis translations for every source sentence.
    """
    was_training = model.training
    model.eval()

    hypotheses = []
    with torch.no_grad():
        for src_sent in tqdm(test_data_src, desc='Decoding', file=sys.stdout):
            example_hyps = model.beam_search(src_sent, beam_size=beam_size, max_decoding_time_step=max_decoding_time_step)

            hypotheses.append(example_hyps)

    if was_training: model.train(was_training)

    return hypotheses



In [0]:
import IPython.display as ipd

sample_rate = 22000
resample_rate = 8000
train_records = 80


In [0]:
import os
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# load model
# if os.path.isfile(model_save_path):
#   params = torch.load(model_save_path, map_location=lambda storage, loc: storage)
#   model.load_state_dict(params['state_dict'])
#   model = model.to(device)

#   print('restore parameters of the optimizers', file=sys.stderr)
#   optimizer.load_state_dict(torch.load(model_save_path + '.optim'))



In [0]:
from multiprocessing import Process, Queue
from utils import load_voices, load_voices_files, split_source_with_pad, read_corpus_from_LJSpeech, batch_iter, get_voice_files_and_corpus, batch_iter_to_queue, batch_iter_to_queue2, load_train_data

dev_files, dev_corpus = get_voice_files_and_corpus('dataset/dev', 5)
dev_data = list(zip(load_voices_files(dev_files, sample_rate, resample_rate), dev_corpus))


clip_grad = 5.0
valid_niter = 1000
log_every = 10
max_epoch = 4000

num_trial = 0
train_records = 16
epoch_size = 16
train_batch_size = 16

train_iter = patience = cum_loss = report_loss = cum_tgt_words = report_tgt_words = 0
cum_examples = report_examples = epoch = valid_num = 0
hist_valid_scores = []
train_time = begin_time = time.time()
print('begin Maximum Likelihood training')
data_queue = Queue(1)
batch_queue = Queue(1)
loss_queue = Queue(2)

train_data_to_queue_process = Process(target=load_train_data, args=('dataset/train', train_records, epoch_size, data_queue, 10))
train_data_to_queue_process.start()

batch_iter_to_queue_process = Process(target=batch_iter_to_queue2, args=(data_queue, batch_queue, loss_queue, max_epoch, train_batch_size, True))
batch_iter_to_queue_process.start()
epoch, voices, tgt_sents = batch_queue.get(True)
current_epoch = -1
train_losses = []
while voices is not None and tgt_sents is not None:

    train_iter += 1
    optimizer.zero_grad()
    
    # voices = load_voices_files(voice_files, sample_rate, resample_rate)
    # voices = voice_files
    batch_size = len(voices)

    example_losses = -model(voices, tgt_sents) # (batch_size,)
    batch_loss = example_losses.sum()
    loss = batch_loss / batch_size

    loss.backward()

    # clip gradient
    grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad)

    optimizer.step()

    batch_losses_val = batch_loss.item()
    report_loss += batch_losses_val
    cum_loss += batch_losses_val

    tgt_words_num_to_predict = sum(len(s[1:]) for s in tgt_sents)  # omitting leading `<s>`
    report_tgt_words += tgt_words_num_to_predict
    cum_tgt_words += tgt_words_num_to_predict
    report_examples += batch_size
    cum_examples += batch_size
    loss_queue.put(report_loss / report_examples)
    train_losses.append({'epoch': epoch,
                         'iter': train_iter,
                         'loss': report_loss / report_examples,
                         'ppl': math.exp(report_loss / report_tgt_words),
                         'cum': cum_examples,
                         'speed': report_tgt_words / (time.time() - train_time)})

    if train_iter % log_every == 0:
        print('epoch %d, iter %d, avg. loss %.2f, avg. ppl %.2f ' \
              'cum. examples %d, speed %.2f words/sec, time elapsed %.2f sec' % (epoch, train_iter,
                                                                                  report_loss / report_examples,
                                                                                  math.exp(report_loss / report_tgt_words),
                                                                                  cum_examples,
                                                                                  report_tgt_words / (time.time() - train_time),
                                                                                  time.time() - begin_time), file=sys.stderr)

        train_time = time.time()
        report_loss = report_tgt_words = report_examples = 0.

    # perform validation
    if train_iter % valid_niter == 0:
        print('epoch %d, iter %d, cum. loss %.2f, cum. ppl %.2f cum. examples %d' % (epoch, train_iter,
                                                                                  cum_loss / cum_examples,
                                                                                  np.exp(cum_loss / cum_tgt_words),
                                                                                  cum_examples), file=sys.stderr)

        cum_loss = cum_examples = cum_tgt_words = 0.
        valid_num += 1

        print('begin validation ...', file=sys.stderr)

        # compute dev. ppl and bleu
        dev_ppl = evaluate_ppl(model, dev_data, batch_size=128)   # dev batch size can be a bit larger
        valid_metric = -dev_ppl

        print('validation: iter %d, dev. ppl %f' % (train_iter, dev_ppl), file=sys.stderr)

        is_better = len(hist_valid_scores) == 0 or valid_metric > max(hist_valid_scores)
        hist_valid_scores.append(valid_metric)

        if is_better:
            patience = 0
            # print('save currently the best model to [%s]' % model_save_path, file=sys.stderr)
            # model.save(model_save_path)

            # # also save the optimizers' state
            # torch.save(optimizer.state_dict(), model_save_path + '.optim')
        elif patience < 100:
            patience += 1
            print('hit patience %d' % patience, file=sys.stderr)

            if patience == 100:
                num_trial += 1
                print('hit #%d trial' % num_trial, file=sys.stderr)
                if num_trial == 3:
                    print('early stop!', file=sys.stderr)
                    exit(0)

                # decay lr, and restore from previously best checkpoint
                lr = optimizer.param_groups[0]['lr'] * 0.5
                print('load previously best model and decay learning rate to %f' % lr, file=sys.stderr)

                # load model
                params = torch.load(model_save_path, map_location=lambda storage, loc: storage)
                model.load_state_dict(params['state_dict'])
                model = model.to(device)

                print('restore parameters of the optimizers', file=sys.stderr)
                optimizer.load_state_dict(torch.load(model_save_path + '.optim'))

                # set new lr
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr

                # reset patience
                patience = 0
        
    epoch, voices, tgt_sents = batch_queue.get()
batch_iter_to_queue_process.join()
      


        



begin Maximum Likelihood training
loading train data ...
remaining train data length: 11
pushing new round train data: 0
geting train data ...


In [0]:
import pandas as pd
import matplotlib.pyplot as plt

model.save(model_save_path)
torch.save(optimizer.state_dict(), model_save_path + '.optim')

ax = plt.gca()
report_df = pd.DataFrame(train_losses)
report_df.to_csv('train-report-64-16-10-2.csv')
report_df.plot(kind='line',x='iter',y='loss',ax=ax)
plt.show()


In [0]:
ax = plt.gca()

reports_files = [1]
for report in reports_files:
  report_df = pd.read_csv('train-report-64-16-10-'+str(report)+".csv")  
  report_df.plot(kind='line',x='iter',y='loss',ax=ax)
plt.show()

In [0]:
# print("load test source sentences from [{}]".format(args['TEST_SOURCE_FILE']), file=sys.stderr)
from utils import get_voice_files_and_corpus_by_indexes

voices_files = []
test_data_tgt = []
for voice_file, sent in get_voice_files_and_corpus_by_indexes('dataset/train', list(range(32, 48))):
  voices_files.append(voice_file)
  test_data_tgt.append(sent)

test_data_src = load_voices_files(voices_files, 22000, 8000)


hypotheses = beam_search(model, test_data_src,
                          beam_size=5,
                          max_decoding_time_step=200)

top_hypotheses = [hyps[0] for hyps in hypotheses]
bleu_score = compute_corpus_level_bleu_score(test_data_tgt, top_hypotheses)
print('Corpus BLEU: {}'.format(bleu_score * 100), file=sys.stderr)

with open('output2.txt', 'w') as f:
    for src_sent, hyps in zip(test_data_src, hypotheses):
        top_hyp = hyps[0]
        hyp_sent = ' '.join(top_hyp.value)
        f.write(hyp_sent + '\n')

