# Features

## Use `dataloader` to get an utterance

In [1]:
from dataloader import get_dataloader
import torch
import numpy as np

# CTC model for ASR

## Obtain phoneme output units

In [2]:
# First find the unique phones in train.json, and then
# create a file named vocab.txt, each line in this 
# file is a unique phone, in total there should be 
# 40 lines

vocab = {}
phonemes = []
with open("vocab_39.txt") as f:
    for id, text in enumerate(f):
        vocab[text.strip()] = id
        phonemes.append(text)
phonemes = phonemes[1:]

In [3]:
vocab

{'_': 0,
 'uh': 1,
 'n': 2,
 's': 3,
 'th': 4,
 'y': 5,
 'ih': 6,
 'ch': 7,
 'aa': 8,
 'l': 9,
 'uw': 10,
 'ah': 11,
 'b': 12,
 'ow': 13,
 'dh': 14,
 'd': 15,
 'aw': 16,
 't': 17,
 'r': 18,
 'w': 19,
 'm': 20,
 'v': 21,
 'ay': 22,
 'f': 23,
 'p': 24,
 'sh': 25,
 'eh': 26,
 'oy': 27,
 'sil': 28,
 'hh': 29,
 'dx': 30,
 'jh': 31,
 'er': 32,
 'iy': 33,
 'g': 34,
 'ae': 35,
 'ey': 36,
 'z': 37,
 'k': 38,
 'ng': 39}

## Model & training configurations

In [3]:
from collections import namedtuple
if torch.cuda.is_available():
    device = "cuda:0"
    print("currently using cuda")
else:
    device = "cpu"
    print("currently using cpu only")

args = {'seed': 123,
        'train_json': 'train_fbank.json',
        'val_json': 'dev_fbank.json',
        'test_json': 'test_fbank.json',
        'batch_size': 4,
        'num_layers': 1,
        'fbank_dims': 23,
        'model_dims': 128,
        'concat': 1,
        'lr': 0.5,
        'vocab': vocab,
        'report_interval': 50,
        'num_epochs': 20,
        'device': device,
       }

args = namedtuple('x', args)(**args)

currently using cuda


# Experiment looking at model dim

In [4]:
import models
model = models.BiLSTM(
    args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab))
num_params = sum(p.numel() for p in model.parameters())
print('Total number of model parameters is {}'.format(num_params))

Total number of model parameters is 166952


In [5]:
from datetime import datetime
from trainer import train
start = datetime.now()
model.to(args.device)
model_path = train(model, args)
end = datetime.now()
duration = (end - start).total_seconds()
print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
print('Model saved to {}'.format(model_path))

EPOCH 1:
torch.Size([484, 4, 23])
torch.Size([484, 4, 256])
torch.Size([484, 4, 40])


torch.Size([295, 4, 23])
torch.Size([295, 4, 256])
torch.Size([295, 4, 40])


torch.Size([428, 4, 23])
torch.Size([428, 4, 256])
torch.Size([428, 4, 40])


torch.Size([374, 4, 23])
torch.Size([374, 4, 256])
torch.Size([374, 4, 40])


torch.Size([290, 4, 23])
torch.Size([290, 4, 256])
torch.Size([290, 4, 40])


torch.Size([427, 4, 23])
torch.Size([427, 4, 256])
torch.Size([427, 4, 40])


torch.Size([522, 4, 23])
torch.Size([522, 4, 256])
torch.Size([522, 4, 40])


torch.Size([511, 4, 23])
torch.Size([511, 4, 256])
torch.Size([511, 4, 40])


torch.Size([424, 4, 23])
torch.Size([424, 4, 256])
torch.Size([424, 4, 40])


torch.Size([395, 4, 23])
torch.Size([395, 4, 256])
torch.Size([395, 4, 40])


torch.Size([507, 4, 23])
torch.Size([507, 4, 256])
torch.Size([507, 4, 40])




KeyboardInterrupt: 

# RUN ALL ABOVE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

## Training

#### Study of Gradient Clipping

In [None]:
args = {'seed': 123,
        'train_json': 'train_fbank.json',
        'val_json': 'dev_fbank.json',
        'test_json': 'test_fbank.json',
        'batch_size': 4,
        'num_layers': 1,
        'fbank_dims': 23,
        'model_dims': 128,
        'concat': 1,
        'lr': 0.5,
        'vocab': vocab,
        'report_interval': 50,
        'num_epochs': 20,
        'device': device,
       }

args = namedtuple('x', args)(**args)

In [4]:
import models
from datetime import datetime
from trainer_Adam import train


model = models.BiLSTM(
    args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab))
num_params = sum(p.numel() for p in model.parameters())
print('Total number of model parameters is {}'.format(num_params))


start = datetime.now()
model.to(args.device)
model_path = train(model, args)
end = datetime.now()
duration = (end - start).total_seconds()
print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
print('Model saved to {}'.format(model_path))

Total number of model parameters is 166952
EPOCH 1:
  batch 50 loss: nan
  batch 100 loss: nan
  batch 150 loss: nan


KeyboardInterrupt: 

### Baseline Here

In [4]:
import models
model = models.BiLSTM(
    args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab))
num_params = sum(p.numel() for p in model.parameters())
print('Total number of model parameters is {}'.format(num_params))

Total number of model parameters is 166952


In [5]:
from datetime import datetime
from trainer import train
start = datetime.now()
model.to(args.device)
model_path = train(model, args)
end = datetime.now()
duration = (end - start).total_seconds()
print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
print('Model saved to {}'.format(model_path))

EPOCH 1:
  batch 50 loss: 5.086774172782898
  batch 100 loss: 3.283014554977417
  batch 150 loss: 3.090360550880432
  batch 200 loss: 2.793375668525696
  batch 250 loss: 2.614130387306213
  batch 300 loss: 2.423339204788208
  batch 350 loss: 2.2986184406280517
  batch 400 loss: 2.2792709922790526
  batch 450 loss: 2.1971713137626647
  batch 500 loss: 2.0925161623954773
  batch 550 loss: 2.0330736899375914
  batch 600 loss: 1.9948135256767272
  batch 650 loss: 1.9030866765975951
  batch 700 loss: 1.894078896045685
  batch 750 loss: 1.8234505438804627
  batch 800 loss: 1.8049822735786438
  batch 850 loss: 1.7591145157814025
  batch 900 loss: 1.7552357149124145
LOSS train 1.75524 valid 1.71082, valid PER 62.31%
EPOCH 2:
  batch 50 loss: 1.7059818196296692
  batch 100 loss: 1.632240788936615
  batch 150 loss: 1.6225002598762512
  batch 200 loss: 1.6177998876571655
  batch 250 loss: 1.6065989542007446
  batch 300 loss: 1.5903397655487062
  batch 350 loss: 1.4828792119026184
  batch 400 loss

  batch 400 loss: 0.7491212117671967
  batch 450 loss: 0.7511372649669648
  batch 500 loss: 0.7659581100940704
  batch 550 loss: 0.6965995353460311
  batch 600 loss: 0.7450645101070404
  batch 650 loss: 0.7636515700817108
  batch 700 loss: 0.7618362134695054
  batch 750 loss: 0.7375964295864105
  batch 800 loss: 0.7501379823684693
  batch 850 loss: 0.7929791331291198
  batch 900 loss: 0.7944529736042023
LOSS train 0.79445 valid 0.95214, valid PER 28.64%
EPOCH 13:
  batch 50 loss: 0.6863662838935852
  batch 100 loss: 0.7173892110586166
  batch 150 loss: 0.6900480782985687
  batch 200 loss: 0.7261819589138031
  batch 250 loss: 0.6869390255212784
  batch 300 loss: 0.6909507423639297
  batch 350 loss: 0.7214938408136368
  batch 400 loss: 0.7088409268856048
  batch 450 loss: 0.7219684791564941
  batch 500 loss: 0.7039810448884964
  batch 550 loss: 0.7466556972265244
  batch 600 loss: 0.7109966671466827
  batch 650 loss: 0.7457374995946884
  batch 700 loss: 0.7470722872018815
  batch 750 los

### DropOut Study here

#### Dropout Study for baseline model is here

In [4]:
import model_regularisation_dropout
from datetime import datetime
from trainer import train
import torch
from decoder import decode

print("Start dropout tuning")

dropout_rates=[0.1, 0.3, 0.5]

for dropout_rate in dropout_rates:
    model_with_dropout = model_regularisation_dropout.BiLSTM(args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab), dropout_rate)
    num_params = sum(p.numel() for p in model_with_dropout.parameters())
    print('Total number of model parameters is {}'.format(num_params))
    start = datetime.now()
    model_with_dropout.to(args.device)
    model_path = train(model_with_dropout, args)
    end = datetime.now()
    duration = (end - start).total_seconds()
    print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
    print('Model saved to {}'.format(model_path))
    
    print('Loading model from {}'.format(model_path))
    model_with_dropout.load_state_dict(torch.load(model_path, map_location=device))
    model_with_dropout.eval()
    results = decode(model_with_dropout, args, args.test_json)
    print("For dropout rate "+str(dropout_rate)+" the best model has "+"SUB: {:.2f}%, DEL: {:.2f}%, INS: {:.2f}%, COR: {:.2f}%, PER: {:.2f}%".format(*results))

print("End dropout tuning")

Start dropout tuning
Total number of model parameters is 166952
EPOCH 1:
  batch 50 loss: 4.890107951164246
  batch 100 loss: 3.123832583427429
  batch 150 loss: 2.9612038803100584
  batch 200 loss: 2.837465834617615
  batch 250 loss: 2.681118760108948
  batch 300 loss: 2.47402494430542
  batch 350 loss: 2.3582753562927246
  batch 400 loss: 2.306531598567963
  batch 450 loss: 2.236013696193695
  batch 500 loss: 2.1494071006774904
  batch 550 loss: 2.0773372983932497
  batch 600 loss: 2.029969322681427
  batch 650 loss: 1.9386557745933533
  batch 700 loss: 1.9379531598091126
  batch 750 loss: 1.8681982254981995
  batch 800 loss: 1.850289089679718
  batch 850 loss: 1.8001644897460938
  batch 900 loss: 1.7743332695960998
LOSS train 1.77433 valid 1.70116, valid PER 64.41%
EPOCH 2:
  batch 50 loss: 1.7266140460968018
  batch 100 loss: 1.683559033870697
  batch 150 loss: 1.649568510055542
  batch 200 loss: 1.6512782955169678
  batch 250 loss: 1.6526165342330932
  batch 300 loss: 1.6292884159

  batch 350 loss: 0.8611113953590394
  batch 400 loss: 0.8878334987163544
  batch 450 loss: 0.8760842978954315
  batch 500 loss: 0.9018292856216431
  batch 550 loss: 0.8373200178146363
  batch 600 loss: 0.8450618469715119
  batch 650 loss: 0.8954958248138428
  batch 700 loss: 0.8742932832241058
  batch 750 loss: 0.8496621632575989
  batch 800 loss: 0.8632363307476044
  batch 850 loss: 0.9055159294605255
  batch 900 loss: 0.8848416638374329
LOSS train 0.88484 valid 0.94071, valid PER 29.80%
EPOCH 13:
  batch 50 loss: 0.8132411193847656
  batch 100 loss: 0.8469280171394348
  batch 150 loss: 0.8175043714046478
  batch 200 loss: 0.8333542454242706
  batch 250 loss: 0.8340844535827636
  batch 300 loss: 0.8193296492099762
  batch 350 loss: 0.8398231458663941
  batch 400 loss: 0.8458323919773102
  batch 450 loss: 0.8519457268714905
  batch 500 loss: 0.821226360797882
  batch 550 loss: 0.8468969118595123
  batch 600 loss: 0.8427791285514832
  batch 650 loss: 0.8556023001670837
  batch 700 loss

  batch 300 loss: 1.4449453711509705
  batch 350 loss: 1.4997631525993347
  batch 400 loss: 1.463903205394745
  batch 450 loss: 1.444760868549347
  batch 500 loss: 1.4349454879760741
  batch 550 loss: 1.4261809849739076
  batch 600 loss: 1.403098659515381
  batch 650 loss: 1.3790776658058166
  batch 700 loss: 1.4032788324356078
  batch 750 loss: 1.4609530544281006
  batch 800 loss: 1.3834053802490234
  batch 850 loss: 1.4100740432739258
  batch 900 loss: 1.3578569269180298
LOSS train 1.35786 valid 1.34689, valid PER 46.03%
EPOCH 4:
  batch 50 loss: 1.3459925580024719
  batch 100 loss: 1.3574698305130004
  batch 150 loss: 1.3052812790870667
  batch 200 loss: 1.3688257503509522
  batch 250 loss: 1.3544222950935363
  batch 300 loss: 1.367674810886383
  batch 350 loss: 1.2633470797538757
  batch 400 loss: 1.3262374711036682
  batch 450 loss: 1.3091137957572938
  batch 500 loss: 1.3010404324531555
  batch 550 loss: 1.3079396271705628
  batch 600 loss: 1.3253489208221436
  batch 650 loss: 1.

  batch 650 loss: 0.9395581877231598
  batch 700 loss: 0.9653921222686768
  batch 750 loss: 0.9124148428440094
  batch 800 loss: 0.9002695143222809
  batch 850 loss: 0.9361945915222168
  batch 900 loss: 0.9338679730892181
LOSS train 0.93387 valid 0.97972, valid PER 31.15%
EPOCH 15:
  batch 50 loss: 0.9046013760566711
  batch 100 loss: 0.8899940848350525
  batch 150 loss: 0.9034054362773896
  batch 200 loss: 0.9487513303756714
  batch 250 loss: 0.9294521808624268
  batch 300 loss: 0.89101478099823
  batch 350 loss: 0.8995310854911804
  batch 400 loss: 0.896698739528656
  batch 450 loss: 0.9088983130455017
  batch 500 loss: 0.8744363379478455
  batch 550 loss: 0.9166146326065063
  batch 600 loss: 0.9526425337791443
  batch 650 loss: 0.9376431119441986
  batch 700 loss: 0.9414861524105071
  batch 750 loss: 0.9244101524353028
  batch 800 loss: 0.9065834474563599
  batch 850 loss: 0.9027424621582031
  batch 900 loss: 0.9141540694236755
LOSS train 0.91415 valid 0.97418, valid PER 30.84%
EPOC

  batch 650 loss: 1.3292195630073547
  batch 700 loss: 1.3736177134513854
  batch 750 loss: 1.3042390489578246
  batch 800 loss: 1.3427158617973327
  batch 850 loss: 1.3480320596694946
  batch 900 loss: 1.3546583557128906
LOSS train 1.35466 valid 1.26431, valid PER 43.73%
EPOCH 6:
  batch 50 loss: 1.330161874294281
  batch 100 loss: 1.2745098960399628
  batch 150 loss: 1.283777447938919
  batch 200 loss: 1.2954178428649903
  batch 250 loss: 1.320525608062744
  batch 300 loss: 1.2892670106887818
  batch 350 loss: 1.300743155479431
  batch 400 loss: 1.2775436902046204
  batch 450 loss: 1.3058509886264802
  batch 500 loss: 1.2866346991062165
  batch 550 loss: 1.3192792558670043
  batch 600 loss: 1.2683533573150634
  batch 650 loss: 1.2947034525871277
  batch 700 loss: 1.2748800683021546
  batch 750 loss: 1.2526218175888062
  batch 800 loss: 1.2498886799812317
  batch 850 loss: 1.2526446998119354
  batch 900 loss: 1.2733447825908661
LOSS train 1.27334 valid 1.19146, valid PER 40.92%
EPOCH 

  batch 50 loss: 0.9936682057380676
  batch 100 loss: 0.992582665681839
  batch 150 loss: 0.9895037305355072
  batch 200 loss: 0.9792919480800628
  batch 250 loss: 0.9966071152687073
  batch 300 loss: 0.9792253005504609
  batch 350 loss: 0.9785455143451691
  batch 400 loss: 1.0347241938114167
  batch 450 loss: 1.0235244929790497
  batch 500 loss: 0.9943523812294006
  batch 550 loss: 1.0067520797252656
  batch 600 loss: 1.0146877717971803
  batch 650 loss: 1.0043502390384673
  batch 700 loss: 0.9991615414619446
  batch 750 loss: 0.97656534075737
  batch 800 loss: 0.9910345017910004
  batch 850 loss: 0.9893876528739929
  batch 900 loss: 0.9616194927692413
LOSS train 0.96162 valid 0.98419, valid PER 32.26%
EPOCH 18:
  batch 50 loss: 0.9784866058826447
  batch 100 loss: 0.9820462501049042
  batch 150 loss: 0.99570272564888
  batch 200 loss: 1.0098820960521697
  batch 250 loss: 0.9774351942539216
  batch 300 loss: 0.9623030960559845
  batch 350 loss: 1.0032907402515412
  batch 400 loss: 0.9

#### Dropout Study for 2 layer LSTM is here

##### run baseline first

In [6]:
import models
model = models.BiLSTM(
    2, args.fbank_dims * args.concat, args.model_dims, len(args.vocab))
num_params = sum(p.numel() for p in model.parameters())
print('Total number of model parameters is {}'.format(num_params))

from datetime import datetime
from trainer import train
start = datetime.now()
model.to(args.device)
model_path = train(model, args)
end = datetime.now()
duration = (end - start).total_seconds()
print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
print('Model saved to {}'.format(model_path))

Total number of model parameters is 562216
EPOCH 1:
  batch 50 loss: 5.061193742752075
  batch 100 loss: 3.3865881252288816
  batch 150 loss: 3.293755221366882
  batch 200 loss: 3.175789179801941
  batch 250 loss: 3.0672779417037965
  batch 300 loss: 2.830714225769043
  batch 350 loss: 2.682212929725647
  batch 400 loss: 2.5671300077438355
  batch 450 loss: 2.472991647720337
  batch 500 loss: 2.35486225605011
  batch 550 loss: 2.2587716722488405
  batch 600 loss: 2.1914604473114014
  batch 650 loss: 2.0849543738365175
  batch 700 loss: 2.074123406410217
  batch 750 loss: 1.9836278295516967
  batch 800 loss: 1.9454709339141845
  batch 850 loss: 1.8785255074501037
  batch 900 loss: 1.837413592338562
LOSS train 1.83741 valid 1.74688, valid PER 67.77%
EPOCH 2:
  batch 50 loss: 1.7467482495307922
  batch 100 loss: 1.6714426279067993
  batch 150 loss: 1.625415678024292
  batch 200 loss: 1.6248766922950744
  batch 250 loss: 1.6164043831825257
  batch 300 loss: 1.5586814308166503
  batch 350 l

  batch 350 loss: 0.6080732196569443
  batch 400 loss: 0.6391519087553025
  batch 450 loss: 0.6158985376358033
  batch 500 loss: 0.6238083720207215
  batch 550 loss: 0.5855730122327805
  batch 600 loss: 0.5930833226442337
  batch 650 loss: 0.6341790676116943
  batch 700 loss: 0.6083855080604553
  batch 750 loss: 0.6148589742183685
  batch 800 loss: 0.6039435869455337
  batch 850 loss: 0.6571168112754822
  batch 900 loss: 0.6443538123369217
LOSS train 0.64435 valid 0.79932, valid PER 24.45%
EPOCH 13:
  batch 50 loss: 0.5453387981653214
  batch 100 loss: 0.5575257384777069
  batch 150 loss: 0.5445376896858215
  batch 200 loss: 0.5853622877597808
  batch 250 loss: 0.5634567672014237
  batch 300 loss: 0.555397053360939
  batch 350 loss: 0.5717096066474915
  batch 400 loss: 0.5824231278896331
  batch 450 loss: 0.5789652889966965
  batch 500 loss: 0.5449232518672943
  batch 550 loss: 0.6015948045253754
  batch 600 loss: 0.5736577785015107
  batch 650 loss: 0.6035661166906356
  batch 700 loss

##### Expriment for dropout in the feed forward layer

In [5]:
import model_regularisation_dropout
from datetime import datetime
from trainer import train
import torch
from decoder import decode

print("Start dropout tuning For 2 Layer LSTM")

dropout_rates=[0.1, 0.2, 0.3, 0.4, 0.5]

for dropout_rate in dropout_rates:
    model_with_dropout = model_regularisation_dropout.BiLSTM(2, args.fbank_dims * args.concat, args.model_dims, len(args.vocab), dropout_rate)
    num_params = sum(p.numel() for p in model_with_dropout.parameters())
    print('Total number of model parameters is {}'.format(num_params))
    start = datetime.now()
    model_with_dropout.to(args.device)
    model_path = train(model_with_dropout, args)
    end = datetime.now()
    duration = (end - start).total_seconds()
    print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
    print('Model saved to {}'.format(model_path))
    
    print('Loading model from {}'.format(model_path))
    model_with_dropout.load_state_dict(torch.load(model_path, map_location=device))
    model_with_dropout.eval()
    results = decode(model_with_dropout, args, args.test_json)
    print("For dropout rate "+str(dropout_rate)+" the best model has "+"SUB: {:.2f}%, DEL: {:.2f}%, INS: {:.2f}%, COR: {:.2f}%, PER: {:.2f}%".format(*results))

print("End dropout tuning For 2 Layer LSTM")

Start dropout tuning For 2 Layer LSTM
Total number of model parameters is 562216
EPOCH 1:
  batch 50 loss: 5.060188307762146
  batch 100 loss: 3.3858893871307374
  batch 150 loss: 3.2872353744506837
  batch 200 loss: 3.1795988035202027
  batch 250 loss: 3.067919526100159
  batch 300 loss: 2.8352953433990478
  batch 350 loss: 2.6856843566894533
  batch 400 loss: 2.571924910545349
  batch 450 loss: 2.4816431427001953
  batch 500 loss: 2.3627015018463133
  batch 550 loss: 2.2688104462623597
  batch 600 loss: 2.207112829685211
  batch 650 loss: 2.1014141702651976
  batch 700 loss: 2.0844292569160463
  batch 750 loss: 1.9996450185775756
  batch 800 loss: 1.9671517157554625
  batch 850 loss: 1.8950777316093446
  batch 900 loss: 1.858528697490692
LOSS train 1.85853 valid 1.75822, valid PER 68.83%
EPOCH 2:
  batch 50 loss: 1.771159212589264
  batch 100 loss: 1.6993750786781312
  batch 150 loss: 1.6493911504745484
  batch 200 loss: 1.6494285464286804
  batch 250 loss: 1.6486912989616394
  batch

  batch 300 loss: 0.6378621757030487
  batch 350 loss: 0.6293581295013427
  batch 400 loss: 0.657848391532898
  batch 450 loss: 0.6672449773550033
  batch 500 loss: 0.6682946443557739
  batch 550 loss: 0.6269550216197968
  batch 600 loss: 0.6290220689773559
  batch 650 loss: 0.6826324135065078
  batch 700 loss: 0.6652283596992493
  batch 750 loss: 0.6575614804029465
  batch 800 loss: 0.6290613812208176
  batch 850 loss: 0.6931658500432968
  batch 900 loss: 0.6664961874485016
LOSS train 0.66650 valid 0.79776, valid PER 24.34%
EPOCH 13:
  batch 50 loss: 0.6145913648605347
  batch 100 loss: 0.6273281401395798
  batch 150 loss: 0.5997854423522949
  batch 200 loss: 0.6248656010627747
  batch 250 loss: 0.6095538872480393
  batch 300 loss: 0.5999653202295303
  batch 350 loss: 0.6087235271930694
  batch 400 loss: 0.6314075326919556
  batch 450 loss: 0.6423661768436432
  batch 500 loss: 0.5914792603254319
  batch 550 loss: 0.6529319715499878
  batch 600 loss: 0.6179822385311127
  batch 650 loss

  batch 250 loss: 1.2253919792175294
  batch 300 loss: 1.2173623478412627
  batch 350 loss: 1.2595860767364502
  batch 400 loss: 1.2377088379859924
  batch 450 loss: 1.2120272827148437
  batch 500 loss: 1.185941617488861
  batch 550 loss: 1.1956944096088409
  batch 600 loss: 1.176425689458847
  batch 650 loss: 1.1383632922172546
  batch 700 loss: 1.1559358644485473
  batch 750 loss: 1.204849693775177
  batch 800 loss: 1.1424061834812165
  batch 850 loss: 1.1770251297950745
  batch 900 loss: 1.1132957673072814
LOSS train 1.11330 valid 1.10549, valid PER 34.28%
EPOCH 4:
  batch 50 loss: 1.0894754946231842
  batch 100 loss: 1.1174729120731355
  batch 150 loss: 1.0627294540405274
  batch 200 loss: 1.093819923400879
  batch 250 loss: 1.1063812112808227
  batch 300 loss: 1.1057107102870942
  batch 350 loss: 1.0285395288467407
  batch 400 loss: 1.0673136389255524
  batch 450 loss: 1.0644252824783325
  batch 500 loss: 1.050697650909424
  batch 550 loss: 1.0643089628219604
  batch 600 loss: 1.0

  batch 600 loss: 0.6053373885154724
  batch 650 loss: 0.6350982815027237
  batch 700 loss: 0.6586503267288208
  batch 750 loss: 0.6282279473543168
  batch 800 loss: 0.6025258708000183
  batch 850 loss: 0.6350004637241363
  batch 900 loss: 0.6452437686920166
LOSS train 0.64524 valid 0.78825, valid PER 24.21%
EPOCH 15:
  batch 50 loss: 0.5861858868598938
  batch 100 loss: 0.5999251955747604
  batch 150 loss: 0.5993826723098755
  batch 200 loss: 0.6458119481801987
  batch 250 loss: 0.6181056714057922
  batch 300 loss: 0.6017287302017212
  batch 350 loss: 0.593247184753418
  batch 400 loss: 0.6009963518381118
  batch 450 loss: 0.589186400771141
  batch 500 loss: 0.5717826420068741
  batch 550 loss: 0.588403902053833
  batch 600 loss: 0.6028299361467362
  batch 650 loss: 0.6066710674762725
  batch 700 loss: 0.6249493777751922
  batch 750 loss: 0.6228002494573593
  batch 800 loss: 0.6010503679513931
  batch 850 loss: 0.5779400831460952
  batch 900 loss: 0.5966325944662094
LOSS train 0.59663

  batch 600 loss: 1.0412308597564697
  batch 650 loss: 1.0034085965156556
  batch 700 loss: 1.0266603875160216
  batch 750 loss: 0.9685362911224366
  batch 800 loss: 1.006379669904709
  batch 850 loss: 0.9903412997722626
  batch 900 loss: 0.9812425303459168
LOSS train 0.98124 valid 0.96009, valid PER 30.27%
EPOCH 6:
  batch 50 loss: 0.991793270111084
  batch 100 loss: 0.9478887474536896
  batch 150 loss: 0.9363275623321533
  batch 200 loss: 0.9389587044715881
  batch 250 loss: 0.9768990278244019
  batch 300 loss: 0.955677160024643
  batch 350 loss: 0.9432829308509827
  batch 400 loss: 0.9343936204910278
  batch 450 loss: 0.9662741053104401
  batch 500 loss: 0.9358430671691894
  batch 550 loss: 0.9594438862800598
  batch 600 loss: 0.9294424343109131
  batch 650 loss: 0.9259474420547485
  batch 700 loss: 0.9411460590362549
  batch 750 loss: 0.9257644736766815
  batch 800 loss: 0.9320670747756958
  batch 850 loss: 0.9153945326805115
  batch 900 loss: 0.9410320901870728
LOSS train 0.94103 

LOSS train 0.63307 valid 0.79375, valid PER 24.03%
EPOCH 17:
  batch 50 loss: 0.595822229385376
  batch 100 loss: 0.6046186757087707
  batch 150 loss: 0.5873568183183671
  batch 200 loss: 0.6070578736066818
  batch 250 loss: 0.6088601517677307
  batch 300 loss: 0.6076909255981445
  batch 350 loss: 0.5879766058921814
  batch 400 loss: 0.6228116065263748
  batch 450 loss: 0.6273621267080307
  batch 500 loss: 0.5882393771409988
  batch 550 loss: 0.5966208469867706
  batch 600 loss: 0.6305315935611725
  batch 650 loss: 0.6219890838861466
  batch 700 loss: 0.6006630831956863
  batch 750 loss: 0.5853275191783905
  batch 800 loss: 0.6006005495786667
  batch 850 loss: 0.619127676486969
  batch 900 loss: 0.6113909250497818
LOSS train 0.61139 valid 0.78189, valid PER 23.52%
EPOCH 18:
  batch 50 loss: 0.5692068809270858
  batch 100 loss: 0.5692355996370315
  batch 150 loss: 0.6171820616722107
  batch 200 loss: 0.5824350708723068
  batch 250 loss: 0.5909203869104386
  batch 300 loss: 0.57299011886

LOSS train 0.97421 valid 0.91337, valid PER 28.62%
EPOCH 8:
  batch 50 loss: 0.8876850199699402
  batch 100 loss: 0.8773271298408508
  batch 150 loss: 0.8931175661087036
  batch 200 loss: 0.8836658608913421
  batch 250 loss: 0.9137315022945404
  batch 300 loss: 0.8328019762039185
  batch 350 loss: 0.9143855500221253
  batch 400 loss: 0.869559268951416
  batch 450 loss: 0.8946341276168823
  batch 500 loss: 0.9204366517066955
  batch 550 loss: 0.8568529117107392
  batch 600 loss: 0.9086426544189453
  batch 650 loss: 0.9305352950096131
  batch 700 loss: 0.8786245501041412
  batch 750 loss: 0.8719937098026276
  batch 800 loss: 0.8766406321525574
  batch 850 loss: 0.8653351473808288
  batch 900 loss: 0.8790978229045868
LOSS train 0.87910 valid 0.88626, valid PER 27.91%
EPOCH 9:
  batch 50 loss: 0.8373034703731537
  batch 100 loss: 0.8607512807846069
  batch 150 loss: 0.8571032214164734
  batch 200 loss: 0.8345617818832397
  batch 250 loss: 0.8798775947093964
  batch 300 loss: 0.888036468029

  batch 300 loss: 0.5964150148630142
  batch 350 loss: 0.584349552989006
  batch 400 loss: 0.6217438161373139
  batch 450 loss: 0.6059185075759888
  batch 500 loss: 0.6116536611318588
  batch 550 loss: 0.586358442902565
  batch 600 loss: 0.6103448206186295
  batch 650 loss: 0.6484892344474793
  batch 700 loss: 0.6030696624517441
  batch 750 loss: 0.5944795697927475
  batch 800 loss: 0.6122959280014038
  batch 850 loss: 0.6109815490245819
  batch 900 loss: 0.6378160607814789
LOSS train 0.63782 valid 0.78938, valid PER 23.62%
EPOCH 20:
  batch 50 loss: 0.5672115510702134
  batch 100 loss: 0.5634790045022965
  batch 150 loss: 0.5358835357427597
  batch 200 loss: 0.6013105577230453
  batch 250 loss: 0.5662721729278565
  batch 300 loss: 0.5920798474550247
  batch 350 loss: 0.5576145344972611
  batch 400 loss: 0.5852137714624405
  batch 450 loss: 0.5750066870450974
  batch 500 loss: 0.564138895869255
  batch 550 loss: 0.6369296914339065
  batch 600 loss: 0.568873291015625
  batch 650 loss: 0

  batch 300 loss: 0.8087916779518127
  batch 350 loss: 0.8575797843933105
  batch 400 loss: 0.798571162223816
  batch 450 loss: 0.8150906187295913
  batch 500 loss: 0.8718073892593384
  batch 550 loss: 0.8829818296432496
  batch 600 loss: 0.8470646035671234
  batch 650 loss: 0.8501823830604553
  batch 700 loss: 0.8563916122913361
  batch 750 loss: 0.8399455380439759
  batch 800 loss: 0.8496668922901154
  batch 850 loss: 0.8581938958168029
  batch 900 loss: 0.8597870779037475
LOSS train 0.85979 valid 0.86082, valid PER 27.16%
EPOCH 11:
  batch 50 loss: 0.7826523685455322
  batch 100 loss: 0.7741999769210816
  batch 150 loss: 0.7790495038032532
  batch 200 loss: 0.8406532180309295
  batch 250 loss: 0.8234323173761368
  batch 300 loss: 0.7882323038578033
  batch 350 loss: 0.8014730489253998
  batch 400 loss: 0.8232809960842132
  batch 450 loss: 0.8469123458862304
  batch 500 loss: 0.7954194748401642
  batch 550 loss: 0.818327466249466
  batch 600 loss: 0.7994848990440369
  batch 650 loss:

#### Expriment for dropout in the middle of LSTM layer

In [4]:
import model_regularisation_dropout_between_layer
from datetime import datetime
from trainer import train
import torch
from decoder import decode

print("Start dropout tuning For 2 Layer LSTM, with Dropout between layer")

dropout_rates=[0.1, 0.2, 0.3, 0.4, 0.5]

for dropout_rate in dropout_rates:
    model_with_dropout = model_regularisation_dropout_between_layer.BiLSTM(2, args.fbank_dims * args.concat, args.model_dims, len(args.vocab), dropout_rate)
    num_params = sum(p.numel() for p in model_with_dropout.parameters())
    print('Total number of model parameters is {}'.format(num_params))
    start = datetime.now()
    model_with_dropout.to(args.device)
    model_path = train(model_with_dropout, args)
    end = datetime.now()
    duration = (end - start).total_seconds()
    print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
    print('Model saved to {}'.format(model_path))
    
    print('Loading model from {}'.format(model_path))
    model_with_dropout.load_state_dict(torch.load(model_path, map_location=device))
    model_with_dropout.eval()
    results = decode(model_with_dropout, args, args.test_json)
    print("For dropout rate "+str(dropout_rate)+" the best model has "+"SUB: {:.2f}%, DEL: {:.2f}%, INS: {:.2f}%, COR: {:.2f}%, PER: {:.2f}%".format(*results))

print("End dropout tuning For 2 Layer LSTM")

Start dropout tuning For 2 Layer LSTM, with Dropout between layer
Total number of model parameters is 562216
EPOCH 1:
  batch 50 loss: 5.159642701148987
  batch 100 loss: 3.3973202085494996
  batch 150 loss: 3.2978872632980347
  batch 200 loss: 3.19533148765564
  batch 250 loss: 3.100555667877197
  batch 300 loss: 2.9161056089401245
  batch 350 loss: 2.7046935987472533
  batch 400 loss: 2.5531535291671754
  batch 450 loss: 2.424420223236084
  batch 500 loss: 2.2945020961761475
  batch 550 loss: 2.2176200222969054
  batch 600 loss: 2.1382415318489074
  batch 650 loss: 2.0364141869544983
  batch 700 loss: 2.018544771671295
  batch 750 loss: 1.9257749223709106
  batch 800 loss: 1.8891727948188781
  batch 850 loss: 1.820354528427124
  batch 900 loss: 1.7865184712409974
LOSS train 1.78652 valid 1.70620, valid PER 65.12%
EPOCH 2:
  batch 50 loss: 1.7227200317382811
  batch 100 loss: 1.68451096534729
  batch 150 loss: 1.5704996657371522
  batch 200 loss: 1.581278338432312
  batch 250 loss: 1.

  batch 250 loss: 0.6078583329916001
  batch 300 loss: 0.6378869652748108
  batch 350 loss: 0.6335612207651138
  batch 400 loss: 0.6514451956748962
  batch 450 loss: 0.6207901287078857
  batch 500 loss: 0.6351327425241471
  batch 550 loss: 0.628686649799347
  batch 600 loss: 0.6425564384460449
  batch 650 loss: 0.6398937177658081
  batch 700 loss: 0.629478639960289
  batch 750 loss: 0.6594138967990876
  batch 800 loss: 0.6157864356040954
  batch 850 loss: 0.6348924052715301
  batch 900 loss: 0.6571180975437164
LOSS train 0.65712 valid 0.83148, valid PER 24.79%
EPOCH 13:
  batch 50 loss: 0.5728895884752273
  batch 100 loss: 0.5750460773706436
  batch 150 loss: 0.6032135576009751
  batch 200 loss: 0.5611831277608872
  batch 250 loss: 0.5755988365411758
  batch 300 loss: 0.6252626669406891
  batch 350 loss: 0.5710573083162308
  batch 400 loss: 0.5742033255100251
  batch 450 loss: 0.6195205962657928
  batch 500 loss: 0.5960983198881149
  batch 550 loss: 0.6438603734970093
  batch 600 loss:

  batch 150 loss: 1.2096086835861206
  batch 200 loss: 1.1884620404243469
  batch 250 loss: 1.1690257263183594
  batch 300 loss: 1.1723815131187438
  batch 350 loss: 1.205858461856842
  batch 400 loss: 1.1960083281993865
  batch 450 loss: 1.1695835435390471
  batch 500 loss: 1.1414825558662414
  batch 550 loss: 1.1609120881557464
  batch 600 loss: 1.134669336080551
  batch 650 loss: 1.1070166504383088
  batch 700 loss: 1.1371642458438873
  batch 750 loss: 1.1779406464099884
  batch 800 loss: 1.1055290389060974
  batch 850 loss: 1.1335516571998596
  batch 900 loss: 1.0688273131847381
LOSS train 1.06883 valid 1.11555, valid PER 34.22%
EPOCH 4:
  batch 50 loss: 1.0532942175865174
  batch 100 loss: 1.0683598721027374
  batch 150 loss: 1.034887113571167
  batch 200 loss: 1.0705954933166504
  batch 250 loss: 1.074849317073822
  batch 300 loss: 1.0754039251804353
  batch 350 loss: 1.005651081800461
  batch 400 loss: 1.047360190153122
  batch 450 loss: 1.0238527595996856
  batch 500 loss: 1.01

  batch 500 loss: 0.620779339671135
  batch 550 loss: 0.6399252843856812
  batch 600 loss: 0.5977893185615539
  batch 650 loss: 0.6163865929841995
  batch 700 loss: 0.6399544936418533
  batch 750 loss: 0.6093517124652863
  batch 800 loss: 0.5792429566383361
  batch 850 loss: 0.6445688301324844
  batch 900 loss: 0.6407246857881546
LOSS train 0.64072 valid 0.79840, valid PER 24.60%
EPOCH 15:
  batch 50 loss: 0.5700673931837081
  batch 100 loss: 0.5674907672405243
  batch 150 loss: 0.5778180158138275
  batch 200 loss: 0.5873262399435043
  batch 250 loss: 0.5937344270944596
  batch 300 loss: 0.5569453924894333
  batch 350 loss: 0.5643184167146683
  batch 400 loss: 0.5726757872104645
  batch 450 loss: 0.5894898724555969
  batch 500 loss: 0.5615575021505356
  batch 550 loss: 0.5855351138114929
  batch 600 loss: 0.5973487722873688
  batch 650 loss: 0.6143311256170273
  batch 700 loss: 0.6140314596891403
  batch 750 loss: 0.5958060890436172
  batch 800 loss: 0.5788542813062668
  batch 850 loss

  batch 450 loss: 0.941043244600296
  batch 500 loss: 0.9723732626438141
  batch 550 loss: 0.9100196421146393
  batch 600 loss: 1.00697514295578
  batch 650 loss: 0.9659905230998993
  batch 700 loss: 0.9973909258842468
  batch 750 loss: 0.9281650209426879
  batch 800 loss: 0.9481864285469055
  batch 850 loss: 0.9540855801105499
  batch 900 loss: 0.9482898414134979
LOSS train 0.94829 valid 0.95004, valid PER 29.79%
EPOCH 6:
  batch 50 loss: 0.9615928161144257
  batch 100 loss: 0.9025125396251679
  batch 150 loss: 0.8930125784873962
  batch 200 loss: 0.8987377691268921
  batch 250 loss: 0.9303556144237518
  batch 300 loss: 0.9119657778739929
  batch 350 loss: 0.9221961581707001
  batch 400 loss: 0.882160769701004
  batch 450 loss: 0.8995257270336151
  batch 500 loss: 0.9061815440654755
  batch 550 loss: 0.9325819683074951
  batch 600 loss: 0.9087784516811371
  batch 650 loss: 0.9018893229961396
  batch 700 loss: 0.897092980146408
  batch 750 loss: 0.8979576313495636
  batch 800 loss: 0.8

  batch 800 loss: 0.6136057090759277
  batch 850 loss: 0.594724811911583
  batch 900 loss: 0.5982856005430222
LOSS train 0.59829 valid 0.78509, valid PER 23.37%
EPOCH 17:
  batch 50 loss: 0.5641423815488815
  batch 100 loss: 0.5757684183120727
  batch 150 loss: 0.55726655960083
  batch 200 loss: 0.5570566582679749
  batch 250 loss: 0.5824283641576767
  batch 300 loss: 0.5799760454893113
  batch 350 loss: 0.5588961988687515
  batch 400 loss: 0.6131984454393387
  batch 450 loss: 0.5822772723436356
  batch 500 loss: 0.5751492547988891
  batch 550 loss: 0.5777616864442825
  batch 600 loss: 0.5983699554204941
  batch 650 loss: 0.5830183255672455
  batch 700 loss: 0.5897963726520539
  batch 750 loss: 0.5660801291465759
  batch 800 loss: 0.5594613248109818
  batch 850 loss: 0.6090428054332733
  batch 900 loss: 0.5775185847282409
LOSS train 0.57752 valid 0.78501, valid PER 23.56%
EPOCH 18:
  batch 50 loss: 0.5534746891260147
  batch 100 loss: 0.5442646652460098
  batch 150 loss: 0.571922258734

  batch 800 loss: 0.8555577909946441
  batch 850 loss: 0.859435658454895
  batch 900 loss: 0.9081391489505768
LOSS train 0.90814 valid 0.88513, valid PER 28.38%
EPOCH 8:
  batch 50 loss: 0.8405251121520996
  batch 100 loss: 0.8214319336414337
  batch 150 loss: 0.8381071650981903
  batch 200 loss: 0.8113514482975006
  batch 250 loss: 0.8337798285484314
  batch 300 loss: 0.7827784180641174
  batch 350 loss: 0.8463729596138001
  batch 400 loss: 0.8112364780902862
  batch 450 loss: 0.8192383325099946
  batch 500 loss: 0.8515306162834168
  batch 550 loss: 0.7975841104984284
  batch 600 loss: 0.8451152229309082
  batch 650 loss: 0.8435632967948914
  batch 700 loss: 0.8020948612689972
  batch 750 loss: 0.8217782306671143
  batch 800 loss: 0.8267990458011627
  batch 850 loss: 0.8088662123680115
  batch 900 loss: 0.8475421631336212
LOSS train 0.84754 valid 0.84467, valid PER 25.67%
EPOCH 9:
  batch 50 loss: 0.7642648577690124
  batch 100 loss: 0.7957785105705262
  batch 150 loss: 0.790621660947

  batch 150 loss: 0.5480594873428345
  batch 200 loss: 0.5484050571918487
  batch 250 loss: 0.5587198388576508
  batch 300 loss: 0.5612789684534073
  batch 350 loss: 0.5593012899160386
  batch 400 loss: 0.5686094760894775
  batch 450 loss: 0.5771916115283966
  batch 500 loss: 0.57904261469841
  batch 550 loss: 0.5479791694879532
  batch 600 loss: 0.5465494191646576
  batch 650 loss: 0.6090065342187881
  batch 700 loss: 0.538641282916069
  batch 750 loss: 0.5391879332065582
  batch 800 loss: 0.5789473533630372
  batch 850 loss: 0.5608839809894561
  batch 900 loss: 0.5693364453315735
LOSS train 0.56934 valid 0.77426, valid PER 23.14%
EPOCH 20:
  batch 50 loss: 0.5242852258682251
  batch 100 loss: 0.5167475575208664
  batch 150 loss: 0.5206020820140839
  batch 200 loss: 0.5425648641586304
  batch 250 loss: 0.5406535828113556
  batch 300 loss: 0.5395273864269257
  batch 350 loss: 0.5276573824882508
  batch 400 loss: 0.5531994462013244
  batch 450 loss: 0.5473116832971573
  batch 500 loss: 

  batch 150 loss: 0.7817466545104981
  batch 200 loss: 0.8004298949241638
  batch 250 loss: 0.8127939867973327
  batch 300 loss: 0.7525503706932067
  batch 350 loss: 0.7843699550628662
  batch 400 loss: 0.7542008453607559
  batch 450 loss: 0.7603062313795089
  batch 500 loss: 0.807077763080597
  batch 550 loss: 0.8168228375911712
  batch 600 loss: 0.792718967795372
  batch 650 loss: 0.7877954089641571
  batch 700 loss: 0.8058598983287811
  batch 750 loss: 0.7677250504493713
  batch 800 loss: 0.7968602502346038
  batch 850 loss: 0.7865158331394195
  batch 900 loss: 0.7970047056674957
LOSS train 0.79700 valid 0.83991, valid PER 26.59%
EPOCH 11:
  batch 50 loss: 0.7283470356464385
  batch 100 loss: 0.7113836658000946
  batch 150 loss: 0.7411256635189056
  batch 200 loss: 0.7751183843612671
  batch 250 loss: 0.7557965618371963
  batch 300 loss: 0.7558820939064026
  batch 350 loss: 0.7549328815937042
  batch 400 loss: 0.7963260293006897
  batch 450 loss: 0.7746511602401733
  batch 500 loss:

### Optimiser Study here

#### Try Adam Optimiser

###### Adam Optimiser setup (Original setup start with learning rate of 0.5)

In [4]:
args = {'seed': 123,
        'train_json': 'train_fbank.json',
        'val_json': 'dev_fbank.json',
        'test_json': 'test_fbank.json',
        'batch_size': 4,
        'num_layers': 1,
        'fbank_dims': 23,
        'model_dims': 128,
        'concat': 1,
        'lr': 0.01,
        'vocab': vocab,
        'report_interval': 50,
        'num_epochs': 20,
        'device': device,
       }

args = namedtuple('x', args)(**args)

In [5]:
import models
from datetime import datetime
from trainer_Adam import train


model = models.BiLSTM(
    args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab))
num_params = sum(p.numel() for p in model.parameters())
print('Total number of model parameters is {}'.format(num_params))


start = datetime.now()
model.to(args.device)
model_path = train(model, args)
end = datetime.now()
duration = (end - start).total_seconds()
print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
print('Model saved to {}'.format(model_path))

Total number of model parameters is 166952
EPOCH 1:
  batch 50 loss: 4.356326017379761
  batch 100 loss: 3.1330445051193236
  batch 150 loss: 2.8419583797454835
  batch 200 loss: 2.561711673736572
  batch 250 loss: 2.3617538261413573
  batch 300 loss: 2.1399513053894044
  batch 350 loss: 1.9726394629478454
  batch 400 loss: 1.9568773770332337
  batch 450 loss: 1.8864873385429382
  batch 500 loss: 1.797443811893463
  batch 550 loss: 1.7341630864143371
  batch 600 loss: 1.7134889769554138
  batch 650 loss: 1.6561346220970155
  batch 700 loss: 1.7090842294692994
  batch 750 loss: 1.6420587301254272
  batch 800 loss: 1.648264808654785
  batch 850 loss: 1.5968303894996643
  batch 900 loss: 1.5695223665237428
LOSS train 1.56952 valid 1.57492, valid PER 50.75%
EPOCH 2:
  batch 50 loss: 1.5474369525909424
  batch 100 loss: 1.5270094859600067
  batch 150 loss: 1.5477495312690734
  batch 200 loss: 1.5761040234565735
  batch 250 loss: 1.545525939464569
  batch 300 loss: 1.5422319173812866
  batch

  batch 350 loss: 1.0934350502490997
  batch 400 loss: 1.1315586709976195
  batch 450 loss: 1.152372659444809
  batch 500 loss: 1.1632296407222749
  batch 550 loss: 1.0659365367889404
  batch 600 loss: 1.10746701836586
  batch 650 loss: 1.1494214141368866
  batch 700 loss: 1.1435328757762908
  batch 750 loss: 1.1246064937114715
  batch 800 loss: 1.0996622264385223
  batch 850 loss: 1.1402192294597626
  batch 900 loss: 1.153761556148529
LOSS train 1.15376 valid 1.22602, valid PER 37.63%
EPOCH 13:
  batch 50 loss: 1.067593069076538
  batch 100 loss: 1.1089253973960878
  batch 150 loss: 1.107014797925949
  batch 200 loss: 1.1286681735515594
  batch 250 loss: 1.1153715670108795
  batch 300 loss: 1.11741379737854
  batch 350 loss: 1.125499370098114
  batch 400 loss: 1.136279535293579
  batch 450 loss: 1.1461881160736085
  batch 500 loss: 1.101652899980545
  batch 550 loss: 1.116063462495804
  batch 600 loss: 1.0991935217380524
  batch 650 loss: 1.1078052914142609
  batch 700 loss: 1.1080184

In [6]:
args = {'seed': 123,
        'train_json': 'train_fbank.json',
        'val_json': 'dev_fbank.json',
        'test_json': 'test_fbank.json',
        'batch_size': 4,
        'num_layers': 1,
        'fbank_dims': 23,
        'model_dims': 128,
        'concat': 1,
        'lr': 0.001,
        'vocab': vocab,
        'report_interval': 50,
        'num_epochs': 20,
        'device': device,
       }

args = namedtuple('x', args)(**args)

In [7]:
import models
from datetime import datetime
from trainer_Adam import train


model = models.BiLSTM(
    args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab))
num_params = sum(p.numel() for p in model.parameters())
print('Total number of model parameters is {}'.format(num_params))


start = datetime.now()
model.to(args.device)
model_path = train(model, args)
end = datetime.now()
duration = (end - start).total_seconds()
print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
print('Model saved to {}'.format(model_path))

Total number of model parameters is 166952
EPOCH 1:
  batch 50 loss: 8.703279008865357
  batch 100 loss: 3.3648267698287966
  batch 150 loss: 3.2602467775344848
  batch 200 loss: 3.208204164505005
  batch 250 loss: 3.131103720664978
  batch 300 loss: 3.054433708190918
  batch 350 loss: 2.9456876945495605
  batch 400 loss: 2.868926486968994
  batch 450 loss: 2.8202915954589844
  batch 500 loss: 2.7227843618392944
  batch 550 loss: 2.666769208908081
  batch 600 loss: 2.5605522918701173
  batch 650 loss: 2.4976044416427614
  batch 700 loss: 2.399198498725891
  batch 750 loss: 2.3446082973480227
  batch 800 loss: 2.283764147758484
  batch 850 loss: 2.24986615896225
  batch 900 loss: 2.164026358127594
LOSS train 2.16403 valid 2.14735, valid PER 77.52%
EPOCH 2:
  batch 50 loss: 2.1206107711791993
  batch 100 loss: 2.041600911617279
  batch 150 loss: 1.9594013357162476
  batch 200 loss: 2.004628264904022
  batch 250 loss: 1.9373662757873535
  batch 300 loss: 1.878817048072815
  batch 350 loss

  batch 350 loss: 0.865443787574768
  batch 400 loss: 0.8717297947406769
  batch 450 loss: 0.8707527434825897
  batch 500 loss: 0.8925782990455627
  batch 550 loss: 0.8321598029136658
  batch 600 loss: 0.8653612720966339
  batch 650 loss: 0.8958389794826508
  batch 700 loss: 0.8833250510692596
  batch 750 loss: 0.8462914633750915
  batch 800 loss: 0.8674984323978424
  batch 850 loss: 0.9173091053962708
  batch 900 loss: 0.8950191617012024
LOSS train 0.89502 valid 1.00286, valid PER 31.61%
EPOCH 13:
  batch 50 loss: 0.820296665430069
  batch 100 loss: 0.8368103361129761
  batch 150 loss: 0.815930073261261
  batch 200 loss: 0.8390223336219788
  batch 250 loss: 0.8462725257873536
  batch 300 loss: 0.8443914639949799
  batch 350 loss: 0.852672780752182
  batch 400 loss: 0.8487203133106231
  batch 450 loss: 0.854149786233902
  batch 500 loss: 0.815921481847763
  batch 550 loss: 0.8599018323421478
  batch 600 loss: 0.8529881203174591
  batch 650 loss: 0.8528653573989868
  batch 700 loss: 0.8

In [4]:
args = {'seed': 123,
        'train_json': 'train_fbank.json',
        'val_json': 'dev_fbank.json',
        'test_json': 'test_fbank.json',
        'batch_size': 4,
        'num_layers': 1,
        'fbank_dims': 23,
        'model_dims': 128,
        'concat': 1,
        'lr': 0.1,
        'vocab': vocab,
        'report_interval': 50,
        'num_epochs': 20,
        'device': device,
       }

args = namedtuple('x', args)(**args)

In [5]:
import models
from datetime import datetime
from trainer_Adam import train


model = models.BiLSTM(
    args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab))
num_params = sum(p.numel() for p in model.parameters())
print('Total number of model parameters is {}'.format(num_params))


start = datetime.now()
model.to(args.device)
model_path = train(model, args)
end = datetime.now()
duration = (end - start).total_seconds()
print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
print('Model saved to {}'.format(model_path))

Total number of model parameters is 166952
EPOCH 1:
  batch 50 loss: 5.506333332061768
  batch 100 loss: 2.8623961925506594
  batch 150 loss: 2.678375926017761
  batch 200 loss: 2.5562650871276857
  batch 250 loss: 2.44685085773468
  batch 300 loss: 2.404448504447937
  batch 350 loss: 2.245123052597046
  batch 400 loss: 2.3010426998138427
  batch 450 loss: 2.220965223312378
  batch 500 loss: 2.164877219200134
  batch 550 loss: 2.168926432132721
  batch 600 loss: 2.174325647354126
  batch 650 loss: 2.1885672116279604
  batch 700 loss: 2.2873722553253173
  batch 750 loss: 2.1368629145622253
  batch 800 loss: 2.1436072731018068
  batch 850 loss: 2.136396086215973
  batch 900 loss: 2.0891373324394227
LOSS train 2.08914 valid 2.05262, valid PER 64.96%
EPOCH 2:
  batch 50 loss: 2.064674139022827
  batch 100 loss: 2.0597986578941345
  batch 150 loss: 2.021226227283478
  batch 200 loss: 2.1158048629760744
  batch 250 loss: 2.1334808588027956
  batch 300 loss: 2.100814940929413
  batch 350 loss

  batch 400 loss: 2.976087384223938
  batch 450 loss: 3.0841492938995363
  batch 500 loss: 2.972876386642456
  batch 550 loss: 2.933476600646973
  batch 600 loss: 2.9375278615951537
  batch 650 loss: 2.840574736595154
  batch 700 loss: 2.7909598541259766
  batch 750 loss: 2.7548926210403444
  batch 800 loss: 2.794164819717407
  batch 850 loss: 2.819136047363281
  batch 900 loss: 2.7712770652770997
LOSS train 2.77128 valid 2.77622, valid PER 74.84%
EPOCH 13:
  batch 50 loss: 2.777683501243591
  batch 100 loss: 2.8953692007064817
  batch 150 loss: 2.793831715583801
  batch 200 loss: 2.808894739151001
  batch 250 loss: 2.817759747505188
  batch 300 loss: 2.8018371534347533
  batch 350 loss: 2.821599745750427
  batch 400 loss: 2.8821763610839843
  batch 450 loss: 2.9553138399124146
  batch 500 loss: 2.839025573730469
  batch 550 loss: 2.818402719497681
  batch 600 loss: 2.8555260419845583
  batch 650 loss: 2.8596240091323852
  batch 700 loss: 2.8557433605194094
  batch 750 loss: 2.88897100

KeyboardInterrupt: 

###### Add gradient clipping

In [8]:
args = {'seed': 123,
        'train_json': 'train_fbank.json',
        'val_json': 'dev_fbank.json',
        'test_json': 'test_fbank.json',
        'batch_size': 4,
        'num_layers': 1,
        'fbank_dims': 23,
        'model_dims': 128,
        'concat': 1,
        'lr': 0.1,
        'vocab': vocab,
        'report_interval': 50,
        'num_epochs': 20,
        'device': device,
       }

args = namedtuple('x', args)(**args)

In [9]:
import models
from datetime import datetime
from trainer_Adam import train


model = models.BiLSTM(
    args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab))
num_params = sum(p.numel() for p in model.parameters())
print('Total number of model parameters is {}'.format(num_params))


start = datetime.now()
model.to(args.device)
model_path = train(model, args)
end = datetime.now()
duration = (end - start).total_seconds()
print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
print('Model saved to {}'.format(model_path))

Total number of model parameters is 166952
EPOCH 1:
  batch 50 loss: 6.2683671283721925
  batch 100 loss: 3.01337543964386
  batch 150 loss: 2.6641845703125
  batch 200 loss: 2.478918595314026
  batch 250 loss: 2.410011293888092
  batch 300 loss: 2.3164991569519042
  batch 350 loss: 2.312526626586914
  batch 400 loss: 2.271858158111572
  batch 450 loss: 2.2899438977241515
  batch 500 loss: 2.263788242340088
  batch 550 loss: 2.228834981918335
  batch 600 loss: 2.2524332904815676
  batch 650 loss: 2.202117931842804
  batch 700 loss: 2.2307498025894166
  batch 750 loss: 2.1722647523880005
  batch 800 loss: 2.2001182770729066
  batch 850 loss: 2.2330781030654907
  batch 900 loss: 2.207210223674774
LOSS train 2.20721 valid 2.22778, valid PER 74.67%
EPOCH 2:
  batch 50 loss: 2.2363215637207032
  batch 100 loss: 2.179481258392334
  batch 150 loss: 2.157339792251587
  batch 200 loss: 2.2023068952560423
  batch 250 loss: 2.220174775123596
  batch 300 loss: 2.179911856651306
  batch 350 loss: 2

  batch 450 loss: 2.36400251865387
  batch 500 loss: 2.332400851249695
  batch 550 loss: 2.363796339035034
  batch 600 loss: 2.4117057657241823
  batch 650 loss: 2.469039225578308
  batch 700 loss: 2.374113063812256
  batch 750 loss: 2.300539391040802
  batch 800 loss: 2.3059232902526854
  batch 850 loss: 2.3085934638977053
  batch 900 loss: 2.3022136998176577
LOSS train 2.30221 valid 2.28326, valid PER 72.92%
EPOCH 13:
  batch 50 loss: 2.2597289228439332
  batch 100 loss: 2.2930776119232177
  batch 150 loss: 2.2755022168159487
  batch 200 loss: 2.265809569358826
  batch 250 loss: 2.296639850139618
  batch 300 loss: 2.255822117328644
  batch 350 loss: 2.270432252883911
  batch 400 loss: 2.278254849910736
  batch 450 loss: 2.32291428565979
  batch 500 loss: 2.2877138900756835
  batch 550 loss: 2.2807856035232543
  batch 600 loss: 2.222296025753021
  batch 650 loss: 2.2069998383522034
  batch 700 loss: 2.2459900689125063
  batch 750 loss: 2.2056263732910155
  batch 800 loss: 2.2112379336

#### Try SGD and Learning Rate Scheduler

In [4]:
import models
model = models.BiLSTM(
    args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab))
num_params = sum(p.numel() for p in model.parameters())
print('Total number of model parameters is {}'.format(num_params))

Total number of model parameters is 166952


In [5]:
from datetime import datetime
from trainer_SGD_Scheduler import train
start = datetime.now()
model.to(args.device)
model_path = train(model, args)
end = datetime.now()
duration = (end - start).total_seconds()
print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
print('Model saved to {}'.format(model_path))

EPOCH 1:
  batch 50 loss: 5.0952321672439576
  batch 100 loss: 3.3160207796096803
  batch 150 loss: 3.1283680248260497
  batch 200 loss: 2.86003559589386
  batch 250 loss: 2.67761887550354
  batch 300 loss: 2.490350155830383
  batch 350 loss: 2.382415699958801
  batch 400 loss: 2.332326679229736
  batch 450 loss: 2.256763663291931
  batch 500 loss: 2.150912969112396
  batch 550 loss: 2.098514678478241
  batch 600 loss: 2.03779465675354
  batch 650 loss: 1.970644669532776
  batch 700 loss: 1.9605630493164063
  batch 750 loss: 1.892756962776184
  batch 800 loss: 1.8726566624641419
  batch 850 loss: 1.8246976494789124
  batch 900 loss: 1.8179186940193177
LOSS train 1.81792 valid 1.74112, valid PER 67.86%
EPOCH 2:
  batch 50 loss: 1.754153950214386
  batch 100 loss: 1.69423823595047
  batch 150 loss: 1.6750897979736328
  batch 200 loss: 1.6854981899261474
  batch 250 loss: 1.6781074762344361
  batch 300 loss: 1.6559325432777405
  batch 350 loss: 1.5633166575431823
  batch 400 loss: 1.57351

  batch 450 loss: 0.7993086898326873
  batch 500 loss: 0.8112813603878021
  batch 550 loss: 0.7378953850269317
  batch 600 loss: 0.7759832632541657
  batch 650 loss: 0.8329361653327942
  batch 700 loss: 0.8032406920194626
  batch 750 loss: 0.783981266617775
  batch 800 loss: 0.7714516514539719
  batch 850 loss: 0.8259145557880402
  batch 900 loss: 0.8219052803516388
LOSS train 0.82191 valid 0.94374, valid PER 29.17%
EPOCH 13:
  batch 50 loss: 0.7252003991603851
  batch 100 loss: 0.764584436416626
  batch 150 loss: 0.7489726227521897
  batch 200 loss: 0.7628170549869537
  batch 250 loss: 0.7518410086631775
  batch 300 loss: 0.76351567029953
  batch 350 loss: 0.761681181192398
  batch 400 loss: 0.7724109297990799
  batch 450 loss: 0.7606583511829377
  batch 500 loss: 0.738277200460434
  batch 550 loss: 0.801950433254242
  batch 600 loss: 0.7445122092962265
  batch 650 loss: 0.780785500407219
  batch 700 loss: 0.7853784263134003
  batch 750 loss: 0.7379002737998962
  batch 800 loss: 0.767

# Study of model structure

## Grid Search fine tune 3 models

## Basically fine-tune dropout rate and optimiser

### Due to limitation in time, choose Dropout Rate=[0.1, 0.2, 0.3, 0.4, 0.5], Optimiser =[ Adam with default setting, SGD with lr Scheduler], talk about why SGD with Scheduler instead of SGD const is inside the list.

## 1. Two Layer LSTM (dropout in the layer between)

In [4]:
dropout_rates = [0.1, 0.2, 0.3, 0.4, 0.5]
Optimiser = ["Adam", "SGD_Scheduler"]

In [None]:
import model_regularisation_dropout_between_layer
from datetime import datetime
from trainer_SGD_Scheduler import train as sgd_trainer
from trainer_Adam import train as adam_trainer
import torch
from decoder import decode

print("Start dropout tuning For 2 Layer LSTM, with Dropout between layer")

for opt in Optimiser:
    print("Currently using "+ opt +" optimiser")
    if opt=="Adam":
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 128,
            'concat': 1,
            'lr': 0.001,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
    else:
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 128,
            'concat': 1,
            'lr': 0.5,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
        
    
    for dropout_rate in dropout_rates:
        print("Currently using dropout rate of "+ str(dropout_rate))
        model_with_dropout = model_regularisation_dropout_between_layer.BiLSTM(2, args.fbank_dims * args.concat, args.model_dims, len(args.vocab), dropout_rate)
        num_params = sum(p.numel() for p in model_with_dropout.parameters())
        print('Total number of model parameters is {}'.format(num_params))
        start = datetime.now()
        model_with_dropout.to(args.device)
        if opt=="Adam":
            model_path = adam_trainer(model_with_dropout, args)
        else:
            model_path = sgd_trainer(model_with_dropout, args)
        end = datetime.now()
        duration = (end - start).total_seconds()
        print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
        print('Model saved to {}'.format(model_path))
    
    print("Finish "+ opt +" optimiser")
print("End tuning For 2 Layer LSTM")
        
    
    

Start dropout tuning For 2 Layer LSTM, with Dropout between layer
Currently using Adam optimiser
Currently using dropout rate of 0.1
Total number of model parameters is 562216
EPOCH 1:
  batch 50 loss: 6.89443835735321
  batch 100 loss: 3.216618900299072
  batch 150 loss: 2.989476499557495
  batch 200 loss: 2.6915573501586914
  batch 250 loss: 2.415686373710632
  batch 300 loss: 2.1722241330146788
  batch 350 loss: 1.98657954454422
  batch 400 loss: 1.9281827473640443
  batch 450 loss: 1.8057073187828063
  batch 500 loss: 1.6893247222900392
  batch 550 loss: 1.6369571709632873
  batch 600 loss: 1.5868984532356263
  batch 650 loss: 1.5016728234291077
  batch 700 loss: 1.5052967405319213
  batch 750 loss: 1.4288172364234923
  batch 800 loss: 1.4431952023506165
  batch 850 loss: 1.40240469455719
  batch 900 loss: 1.357992215156555
LOSS train 1.35799 valid 1.33853, valid PER 42.81%
EPOCH 2:
  batch 50 loss: 1.318509771823883
  batch 100 loss: 1.3002801442146301
  batch 150 loss: 1.22784077

  batch 150 loss: 0.4593409463763237
  batch 200 loss: 0.4617572242021561
  batch 250 loss: 0.4813243979215622
  batch 300 loss: 0.4950635641813278
  batch 350 loss: 0.4529670214653015
  batch 400 loss: 0.5138579666614532
  batch 450 loss: 0.4745971179008484
  batch 500 loss: 0.4934589648246765
  batch 550 loss: 0.4799868881702423
  batch 600 loss: 0.49178849935531616
  batch 650 loss: 0.48386700749397277
  batch 700 loss: 0.49086237609386446
  batch 750 loss: 0.5024517822265625
  batch 800 loss: 0.480209481716156
  batch 850 loss: 0.5043855285644532
  batch 900 loss: 0.5117737752199173
LOSS train 0.51177 valid 0.81207, valid PER 24.14%
EPOCH 13:
  batch 50 loss: 0.42272994577884676
  batch 100 loss: 0.4382612407207489
  batch 150 loss: 0.42835679709911345
  batch 200 loss: 0.4145966079831123
  batch 250 loss: 0.43551869571208957
  batch 300 loss: 0.47151772916316986
  batch 350 loss: 0.43700083911418913
  batch 400 loss: 0.4424992525577545
  batch 450 loss: 0.46599395453929904
  batch

  batch 150 loss: 1.0529650139808655
  batch 200 loss: 1.037805564403534
  batch 250 loss: 1.0213603842258454
  batch 300 loss: 1.0156231784820557
  batch 350 loss: 1.0588284718990326
  batch 400 loss: 1.018219221830368
  batch 450 loss: 1.0154740905761719
  batch 500 loss: 1.0006765067577361
  batch 550 loss: 1.0288334167003632
  batch 600 loss: 0.972760648727417
  batch 650 loss: 0.9719089198112488
  batch 700 loss: 0.9967114627361298
  batch 750 loss: 1.0274904346466065
  batch 800 loss: 0.9684587502479554
  batch 850 loss: 1.0079766821861267
  batch 900 loss: 0.9359860217571259
LOSS train 0.93599 valid 1.03182, valid PER 31.71%
EPOCH 4:
  batch 50 loss: 0.9178520679473877
  batch 100 loss: 0.9227063620090484
  batch 150 loss: 0.8974242174625396
  batch 200 loss: 0.9424401867389679
  batch 250 loss: 0.9628462743759155
  batch 300 loss: 0.9550552952289582
  batch 350 loss: 0.8973564386367798
  batch 400 loss: 0.9269311964511872
  batch 450 loss: 0.9215065371990204
  batch 500 loss: 0

  batch 500 loss: 0.5360938501358032
  batch 550 loss: 0.541271276473999
  batch 600 loss: 0.5028195881843567
  batch 650 loss: 0.5422133433818818
  batch 700 loss: 0.5595635145902633
  batch 750 loss: 0.5181500577926635
  batch 800 loss: 0.5150188344717026
  batch 850 loss: 0.5579300951957703
  batch 900 loss: 0.5442490893602371
LOSS train 0.54425 valid 0.80401, valid PER 23.87%
EPOCH 15:
  batch 50 loss: 0.48622271001338957
  batch 100 loss: 0.48267479360103605
  batch 150 loss: 0.4764319306612015
  batch 200 loss: 0.5096748358011246
  batch 250 loss: 0.5065958005189896
  batch 300 loss: 0.47804209947586057
  batch 350 loss: 0.5042109757661819
  batch 400 loss: 0.4882518947124481
  batch 450 loss: 0.49401808142662046
  batch 500 loss: 0.4819461172819138
  batch 550 loss: 0.5043445414304734
  batch 600 loss: 0.5173967981338501
  batch 650 loss: 0.527086751461029
  batch 700 loss: 0.5278946548700333
  batch 750 loss: 0.5244166368246078
  batch 800 loss: 0.5007970821857453
  batch 850 l

  batch 550 loss: 0.804352980852127
  batch 600 loss: 0.8804282653331756
  batch 650 loss: 0.8333887898921967
  batch 700 loss: 0.8725360369682312
  batch 750 loss: 0.8117343008518219
  batch 800 loss: 0.8621259164810181
  batch 850 loss: 0.8487148261070252
  batch 900 loss: 0.8521582746505737
LOSS train 0.85216 valid 0.90621, valid PER 28.52%
EPOCH 6:
  batch 50 loss: 0.8334192991256714
  batch 100 loss: 0.7778025412559509
  batch 150 loss: 0.7596457952260971
  batch 200 loss: 0.7855927866697311
  batch 250 loss: 0.8376350200176239
  batch 300 loss: 0.7991802304983139
  batch 350 loss: 0.7919099676609039
  batch 400 loss: 0.7861126446723938
  batch 450 loss: 0.8060383319854736
  batch 500 loss: 0.7887142336368561
  batch 550 loss: 0.8177850568294525
  batch 600 loss: 0.7933033263683319
  batch 650 loss: 0.8057477974891663
  batch 700 loss: 0.8087020230293274
  batch 750 loss: 0.7772293084859848
  batch 800 loss: 0.796122694015503
  batch 850 loss: 0.7728651475906372
  batch 900 loss: 

  batch 900 loss: 0.5167544496059417
LOSS train 0.51675 valid 0.81792, valid PER 23.57%
EPOCH 17:
  batch 50 loss: 0.48689638435840604
  batch 100 loss: 0.4792957079410553
  batch 150 loss: 0.4680852577090263
  batch 200 loss: 0.46640166819095613
  batch 250 loss: 0.47899615406990054
  batch 300 loss: 0.4888946706056595
  batch 350 loss: 0.4682308745384216
  batch 400 loss: 0.5013286709785462
  batch 450 loss: 0.49879133999347686
  batch 500 loss: 0.4799861377477646
  batch 550 loss: 0.4664834040403366
  batch 600 loss: 0.5188808411359787
  batch 650 loss: 0.4800785529613495
  batch 700 loss: 0.4763843303918838
  batch 750 loss: 0.4799040347337723
  batch 800 loss: 0.46122271597385406
  batch 850 loss: 0.5001346248388291
  batch 900 loss: 0.5005177932977677
LOSS train 0.50052 valid 0.80223, valid PER 23.13%
EPOCH 18:
  batch 50 loss: 0.44202853202819825
  batch 100 loss: 0.4474792331457138
  batch 150 loss: 0.47312208473682404
  batch 200 loss: 0.45899058401584625
  batch 250 loss: 0.4

  batch 50 loss: 0.7268823367357254
  batch 100 loss: 0.7349890881776809
  batch 150 loss: 0.7295937353372574
  batch 200 loss: 0.7143506240844727
  batch 250 loss: 0.7382188314199447
  batch 300 loss: 0.6894061255455017
  batch 350 loss: 0.7651323974132538
  batch 400 loss: 0.694420627951622
  batch 450 loss: 0.7323562061786651
  batch 500 loss: 0.7702307188510895
  batch 550 loss: 0.7111901944875717
  batch 600 loss: 0.7469295918941498
  batch 650 loss: 0.7620429718494415
  batch 700 loss: 0.721162074804306
  batch 750 loss: 0.7163517618179321
  batch 800 loss: 0.7450493955612183
  batch 850 loss: 0.7161841875314713
  batch 900 loss: 0.7603015494346619
LOSS train 0.76030 valid 0.86214, valid PER 26.19%
EPOCH 9:
  batch 50 loss: 0.6771011447906494
  batch 100 loss: 0.688838055729866
  batch 150 loss: 0.7023711949586868
  batch 200 loss: 0.6718297672271728
  batch 250 loss: 0.7150232744216919
  batch 300 loss: 0.7054979377985
  batch 350 loss: 0.7228101468086243
  batch 400 loss: 0.689

  batch 400 loss: 0.4739803194999695
  batch 450 loss: 0.49771519780158996
  batch 500 loss: 0.48587507784366607
  batch 550 loss: 0.47453959822654723
  batch 600 loss: 0.48166562139987945
  batch 650 loss: 0.5088045483827591
  batch 700 loss: 0.47520000994205475
  batch 750 loss: 0.4718728107213974
  batch 800 loss: 0.5101520848274231
  batch 850 loss: 0.4907538664340973
  batch 900 loss: 0.5112993305921555
LOSS train 0.51130 valid 0.80489, valid PER 23.29%
EPOCH 20:
  batch 50 loss: 0.4431626093387604
  batch 100 loss: 0.44302534967660906
  batch 150 loss: 0.4380750918388367
  batch 200 loss: 0.464879949092865
  batch 250 loss: 0.4507046562433243
  batch 300 loss: 0.4627227354049683
  batch 350 loss: 0.4472335213422775
  batch 400 loss: 0.4643052500486374
  batch 450 loss: 0.46353774845600126
  batch 500 loss: 0.45222378611564634
  batch 550 loss: 0.49999658346176146
  batch 600 loss: 0.4585414236783981
  batch 650 loss: 0.4629732546210289
  batch 700 loss: 0.47838465094566346
  batc

  batch 500 loss: 0.7085885316133499
  batch 550 loss: 0.7165661180019378
  batch 600 loss: 0.6815368151664734
  batch 650 loss: 0.6775945323705673
  batch 700 loss: 0.7237945479154587
  batch 750 loss: 0.6785051238536834
  batch 800 loss: 0.6899566745758057
  batch 850 loss: 0.7004611551761627
  batch 900 loss: 0.7223501765727997
LOSS train 0.72235 valid 0.80185, valid PER 24.64%
EPOCH 11:
  batch 50 loss: 0.6374377524852752
  batch 100 loss: 0.6104527217149734
  batch 150 loss: 0.6431829398870468
  batch 200 loss: 0.678671418428421
  batch 250 loss: 0.6718672221899032
  batch 300 loss: 0.6489477509260178
  batch 350 loss: 0.6531985241174698
  batch 400 loss: 0.6830776917934418
  batch 450 loss: 0.6618759876489639
  batch 500 loss: 0.6367941266298294
  batch 550 loss: 0.678585866689682
  batch 600 loss: 0.6407218086719513
  batch 650 loss: 0.717633386850357
  batch 700 loss: 0.6466231709718704
  batch 750 loss: 0.6514138102531433
  batch 800 loss: 0.6718869292736054
  batch 850 loss: 

  batch 550 loss: 2.213016440868378
  batch 600 loss: 2.1415928030014038
  batch 650 loss: 2.0392748427391054
  batch 700 loss: 2.0253085017204286
  batch 750 loss: 1.9373744654655456
  batch 800 loss: 1.8973784995079042
  batch 850 loss: 1.8400518822669982
  batch 900 loss: 1.7983426666259765
LOSS train 1.79834 valid 1.72494, valid PER 65.65%
EPOCH 2:
  batch 50 loss: 1.7247286224365235
  batch 100 loss: 1.6412481760978699
  batch 150 loss: 1.6123999047279358
  batch 200 loss: 1.6127618265151977
  batch 250 loss: 1.614330551624298
  batch 300 loss: 1.5500157141685487
  batch 350 loss: 1.4685749363899232
  batch 400 loss: 1.476932008266449
  batch 450 loss: 1.413519811630249
  batch 500 loss: 1.4332879114151
  batch 550 loss: 1.4254151749610902
  batch 600 loss: 1.3675181484222412
  batch 650 loss: 1.3657868349552154
  batch 700 loss: 1.326122670173645
  batch 750 loss: 1.3120778620243072
  batch 800 loss: 1.2654885983467101
  batch 850 loss: 1.262564994096756
  batch 900 loss: 1.27872

  batch 800 loss: 0.5669085109233856
  batch 850 loss: 0.61966890335083
  batch 900 loss: 0.5924657940864563
LOSS train 0.59247 valid 0.77436, valid PER 23.83%
EPOCH 13:
  batch 50 loss: 0.5348767662048339
  batch 100 loss: 0.5325357347726822
  batch 150 loss: 0.5225240755081176
  batch 200 loss: 0.5657066476345062
  batch 250 loss: 0.5436020436882972
  batch 300 loss: 0.5245435458421707
  batch 350 loss: 0.5288926148414612
  batch 400 loss: 0.5531266617774964
  batch 450 loss: 0.5600366497039795
  batch 500 loss: 0.5261759752035141
  batch 550 loss: 0.5637225943803787
  batch 600 loss: 0.5350692582130432
  batch 650 loss: 0.5683085906505585
  batch 700 loss: 0.5616145044565201
  batch 750 loss: 0.5202095597982407
  batch 800 loss: 0.5455037838220597
  batch 850 loss: 0.5681731200218201
  batch 900 loss: 0.546767663359642
Epoch 00013: reducing learning rate of group 0 to 1.2500e-01.
LOSS train 0.54677 valid 0.77905, valid PER 23.36%
EPOCH 14:
  batch 50 loss: 0.4943955099582672
  batch

  batch 250 loss: 1.166030250787735
  batch 300 loss: 1.1658530414104462
  batch 350 loss: 1.2210389614105224
  batch 400 loss: 1.194639993906021
  batch 450 loss: 1.1562398302555084
  batch 500 loss: 1.1427105927467347
  batch 550 loss: 1.1494333088397979
  batch 600 loss: 1.1155912017822265
  batch 650 loss: 1.1130953347682953
  batch 700 loss: 1.1264884865283966
  batch 750 loss: 1.1743736314773559
  batch 800 loss: 1.09774751663208
  batch 850 loss: 1.1432530403137207
  batch 900 loss: 1.0657747721672057
LOSS train 1.06577 valid 1.11161, valid PER 33.98%
EPOCH 4:
  batch 50 loss: 1.0477769899368286
  batch 100 loss: 1.0583287930488587
  batch 150 loss: 1.021544268131256
  batch 200 loss: 1.0519409167766571
  batch 250 loss: 1.088882246017456
  batch 300 loss: 1.0704599249362945
  batch 350 loss: 1.002511478662491
  batch 400 loss: 1.0293425083160401
  batch 450 loss: 1.035027756690979
  batch 500 loss: 1.0083077609539033
  batch 550 loss: 1.0392204070091247
  batch 600 loss: 1.0509

  batch 400 loss: 0.5053264877200127
  batch 450 loss: 0.509397833943367
  batch 500 loss: 0.5055256658792495
  batch 550 loss: 0.5307370346784591
  batch 600 loss: 0.50257248878479
  batch 650 loss: 0.5139719933271408
  batch 700 loss: 0.5273624897003174
  batch 750 loss: 0.5130464774370194
  batch 800 loss: 0.4831137791275978
  batch 850 loss: 0.5177164828777313
  batch 900 loss: 0.5143761825561524
LOSS train 0.51438 valid 0.75496, valid PER 22.76%
EPOCH 15:
  batch 50 loss: 0.4825908374786377
  batch 100 loss: 0.48954946756362916
  batch 150 loss: 0.48404073297977446
  batch 200 loss: 0.5056552296876907
  batch 250 loss: 0.5206176829338074
  batch 300 loss: 0.47752333104610445
  batch 350 loss: 0.4776881778240204
  batch 400 loss: 0.488422127366066
  batch 450 loss: 0.48879369914531706
  batch 500 loss: 0.46431910157203676
  batch 550 loss: 0.48940527498722075
  batch 600 loss: 0.4947173303365707
  batch 650 loss: 0.5217531430721283
  batch 700 loss: 0.5231659853458405
  batch 750 l

  batch 100 loss: 0.9725405848026276
  batch 150 loss: 1.0042237663269042
  batch 200 loss: 0.9368509650230408
  batch 250 loss: 0.9564841270446778
  batch 300 loss: 0.9600777947902679
  batch 350 loss: 0.9627470517158508
  batch 400 loss: 0.972723833322525
  batch 450 loss: 0.9594013309478759
  batch 500 loss: 0.9720083141326904
  batch 550 loss: 0.9187964522838592
  batch 600 loss: 0.9919346499443055
  batch 650 loss: 0.9509259164333344
  batch 700 loss: 0.999151428937912
  batch 750 loss: 0.9200812220573426
  batch 800 loss: 0.949820761680603
  batch 850 loss: 0.948107818365097
  batch 900 loss: 0.9615027630329132
LOSS train 0.96150 valid 1.01172, valid PER 30.96%
EPOCH 6:
  batch 50 loss: 0.9429016828536987
  batch 100 loss: 0.8799566686153412
  batch 150 loss: 0.8712810063362122
  batch 200 loss: 0.8873822379112244
  batch 250 loss: 0.9287836956977844
  batch 300 loss: 0.9295876729488373
  batch 350 loss: 0.912156126499176
  batch 400 loss: 0.8822897005081177
  batch 450 loss: 0.9

  batch 350 loss: 0.5244802129268646
  batch 400 loss: 0.5424978291988373
  batch 450 loss: 0.5436205965280533
  batch 500 loss: 0.49082095086574556
  batch 550 loss: 0.5207916647195816
  batch 600 loss: 0.5121731650829315
  batch 650 loss: 0.5276100486516953
  batch 700 loss: 0.509233728647232
  batch 750 loss: 0.5267081660032272
  batch 800 loss: 0.5499650007486343
  batch 850 loss: 0.5255328035354614
  batch 900 loss: 0.5301142400503158
LOSS train 0.53011 valid 0.76290, valid PER 22.80%
EPOCH 17:
  batch 50 loss: 0.4959378817677498
  batch 100 loss: 0.5161001408100128
  batch 150 loss: 0.4895546129345894
  batch 200 loss: 0.5125252854824066
  batch 250 loss: 0.5217769157886505
  batch 300 loss: 0.5145756435394288
  batch 350 loss: 0.47896467089653016
  batch 400 loss: 0.5370862585306168
  batch 450 loss: 0.5105769348144531
  batch 500 loss: 0.5075761717557907
  batch 550 loss: 0.5053611314296722
  batch 600 loss: 0.5324865245819091
  batch 650 loss: 0.496419762969017
  batch 700 los

In [4]:
dropout_rates = [0.3, 0.4, 0.5]
Optimiser = ["SGD_Scheduler"]

In [5]:
import model_regularisation_dropout_between_layer
from datetime import datetime
from trainer_SGD_Scheduler import train as sgd_trainer
from trainer_Adam import train as adam_trainer
import torch
from decoder import decode

print("Start dropout tuning For 2 Layer LSTM, with Dropout between layer")

for opt in Optimiser:
    print("Currently using "+ opt +" optimiser")
    if opt=="Adam":
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 128,
            'concat': 1,
            'lr': 0.001,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
    else:
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 128,
            'concat': 1,
            'lr': 0.5,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
        
    
    for dropout_rate in dropout_rates:
        print("Currently using dropout rate of "+ str(dropout_rate))
        model_with_dropout = model_regularisation_dropout_between_layer.BiLSTM(2, args.fbank_dims * args.concat, args.model_dims, len(args.vocab), dropout_rate)
        num_params = sum(p.numel() for p in model_with_dropout.parameters())
        print('Total number of model parameters is {}'.format(num_params))
        start = datetime.now()
        model_with_dropout.to(args.device)
        if opt=="Adam":
            model_path = adam_trainer(model_with_dropout, args)
        else:
            model_path = sgd_trainer(model_with_dropout, args)
        end = datetime.now()
        duration = (end - start).total_seconds()
        print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
        print('Model saved to {}'.format(model_path))
    
    print("Finish "+ opt +" optimiser")
print("End tuning For 2 Layer LSTM")
        
    
    

Start dropout tuning For 2 Layer LSTM, with Dropout between layer
Currently using SGD_Scheduler optimiser
Currently using dropout rate of 0.3
Total number of model parameters is 562216
EPOCH 1:
  batch 50 loss: 5.016525239944458
  batch 100 loss: 3.4115882444381715
  batch 150 loss: 3.3051007080078123
  batch 200 loss: 3.1956518268585206
  batch 250 loss: 3.104453806877136
  batch 300 loss: 2.915277571678162
  batch 350 loss: 2.727032699584961
  batch 400 loss: 2.5863119077682497
  batch 450 loss: 2.4851508140563965
  batch 500 loss: 2.350493211746216
  batch 550 loss: 2.279656867980957
  batch 600 loss: 2.207398736476898
  batch 650 loss: 2.10341272354126
  batch 700 loss: 2.088429207801819
  batch 750 loss: 2.0015905618667604
  batch 800 loss: 1.9573982453346253
  batch 850 loss: 1.889895441532135
  batch 900 loss: 1.8509030890464784
LOSS train 1.85090 valid 1.74306, valid PER 66.74%
EPOCH 2:
  batch 50 loss: 1.7869226622581482
  batch 100 loss: 1.7218112778663635
  batch 150 loss: 1

  batch 50 loss: 0.5838898283243179
  batch 100 loss: 0.584637714624405
  batch 150 loss: 0.6082290500402451
  batch 200 loss: 0.6123249953985215
  batch 250 loss: 0.6082725042104721
  batch 300 loss: 0.6399419790506363
  batch 350 loss: 0.6068748897314071
  batch 400 loss: 0.6516255646944046
  batch 450 loss: 0.6073781037330628
  batch 500 loss: 0.6215384787321091
  batch 550 loss: 0.6249072128534316
  batch 600 loss: 0.625762984752655
  batch 650 loss: 0.6148609906435013
  batch 700 loss: 0.6244295924901963
  batch 750 loss: 0.6189127373695373
  batch 800 loss: 0.5870182865858078
  batch 850 loss: 0.6137241071462631
  batch 900 loss: 0.6532756304740905
LOSS train 0.65328 valid 0.77217, valid PER 23.45%
EPOCH 13:
  batch 50 loss: 0.5688279384374618
  batch 100 loss: 0.5893058794736862
  batch 150 loss: 0.6108884930610656
  batch 200 loss: 0.5475782239437104
  batch 250 loss: 0.5809630876779557
  batch 300 loss: 0.6187974029779434
  batch 350 loss: 0.5731125921010971
  batch 400 loss: 

  batch 700 loss: 1.37415354013443
  batch 750 loss: 1.3729534506797791
  batch 800 loss: 1.2833132910728455
  batch 850 loss: 1.2938239800930023
  batch 900 loss: 1.3231087374687194
LOSS train 1.32311 valid 1.25626, valid PER 39.45%
EPOCH 3:
  batch 50 loss: 1.2676919984817505
  batch 100 loss: 1.239076018333435
  batch 150 loss: 1.2357685256004334
  batch 200 loss: 1.210247926712036
  batch 250 loss: 1.2088853085041047
  batch 300 loss: 1.201525353193283
  batch 350 loss: 1.2349643754959105
  batch 400 loss: 1.212140496969223
  batch 450 loss: 1.1798308026790618
  batch 500 loss: 1.1663497471809388
  batch 550 loss: 1.167036510705948
  batch 600 loss: 1.1415396630764008
  batch 650 loss: 1.1236836791038514
  batch 700 loss: 1.1518934857845307
  batch 750 loss: 1.1890851330757142
  batch 800 loss: 1.1374614071846008
  batch 850 loss: 1.162969640493393
  batch 900 loss: 1.0896569728851317
LOSS train 1.08966 valid 1.11306, valid PER 33.95%
EPOCH 4:
  batch 50 loss: 1.089821195602417
  b

  batch 100 loss: 0.6504522484540939
  batch 150 loss: 0.6578271722793579
  batch 200 loss: 0.654436354637146
  batch 250 loss: 0.649811784029007
  batch 300 loss: 0.6844654375314713
  batch 350 loss: 0.6342931139469147
  batch 400 loss: 0.6446116667985916
  batch 450 loss: 0.6525247675180436
  batch 500 loss: 0.6699568223953247
  batch 550 loss: 0.6745292925834656
  batch 600 loss: 0.6360117310285568
  batch 650 loss: 0.6651954269409179
  batch 700 loss: 0.7039356762170792
  batch 750 loss: 0.6555687648057937
  batch 800 loss: 0.6175907528400422
  batch 850 loss: 0.681321924328804
  batch 900 loss: 0.674006478190422
Epoch 00014: reducing learning rate of group 0 to 2.5000e-01.
LOSS train 0.67401 valid 0.80690, valid PER 24.74%
EPOCH 15:
  batch 50 loss: 0.6155156350135803
  batch 100 loss: 0.5976758313179016
  batch 150 loss: 0.5831924325227738
  batch 200 loss: 0.5984531444311142
  batch 250 loss: 0.5909278273582459
  batch 300 loss: 0.5673621094226837
  batch 350 loss: 0.56819584012

LOSS train 1.07671 valid 1.02711, valid PER 31.45%
EPOCH 5:
  batch 50 loss: 1.0100383234024048
  batch 100 loss: 0.9997524058818817
  batch 150 loss: 1.055371605157852
  batch 200 loss: 0.974449725151062
  batch 250 loss: 0.9849259543418885
  batch 300 loss: 0.991023987531662
  batch 350 loss: 0.9944078087806701
  batch 400 loss: 1.0231863915920258
  batch 450 loss: 0.9803883695602417
  batch 500 loss: 1.0212749493122102
  batch 550 loss: 0.9542740225791931
  batch 600 loss: 1.0282235288619994
  batch 650 loss: 0.9903670644760132
  batch 700 loss: 1.028361624479294
  batch 750 loss: 0.9583185136318206
  batch 800 loss: 1.001654224395752
  batch 850 loss: 0.9917469620704651
  batch 900 loss: 0.9774023735523224
LOSS train 0.97740 valid 0.96864, valid PER 29.81%
EPOCH 6:
  batch 50 loss: 0.9934137964248657
  batch 100 loss: 0.924176127910614
  batch 150 loss: 0.9232482171058655
  batch 200 loss: 0.9314591348171234
  batch 250 loss: 0.9838048124313354
  batch 300 loss: 0.9578409242630005


  batch 250 loss: 0.6019264513254166
  batch 300 loss: 0.599294223189354
  batch 350 loss: 0.6041325533390045
  batch 400 loss: 0.6034011167287826
  batch 450 loss: 0.620015754699707
  batch 500 loss: 0.583728432059288
  batch 550 loss: 0.5881794983148575
  batch 600 loss: 0.596561968922615
  batch 650 loss: 0.6142473244667053
  batch 700 loss: 0.5762570124864578
  batch 750 loss: 0.6027993375062942
  batch 800 loss: 0.6140435397624969
  batch 850 loss: 0.5988633340597153
  batch 900 loss: 0.5957667988538742
LOSS train 0.59577 valid 0.75736, valid PER 23.11%
EPOCH 17:
  batch 50 loss: 0.5888668358325958
  batch 100 loss: 0.5785990309715271
  batch 150 loss: 0.5605135995149613
  batch 200 loss: 0.5799290180206299
  batch 250 loss: 0.5875668567419052
  batch 300 loss: 0.5965553420782089
  batch 350 loss: 0.5604393470287323
  batch 400 loss: 0.6068138247728347
  batch 450 loss: 0.5955395770072937
  batch 500 loss: 0.5643366122245789
  batch 550 loss: 0.5682651746273041
  batch 600 loss: 0

In [4]:
dropout_rates = [0]
Optimiser = ["Adam", "SGD_Scheduler"]

In [5]:
import model_regularisation_dropout_between_layer
from datetime import datetime
from trainer_SGD_Scheduler import train as sgd_trainer
from trainer_Adam import train as adam_trainer
import torch
from decoder import decode

print("Start dropout tuning For 2 Layer LSTM, with Dropout between layer")

for opt in Optimiser:
    print("Currently using "+ opt +" optimiser")
    if opt=="Adam":
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 128,
            'concat': 1,
            'lr': 0.001,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
    else:
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 128,
            'concat': 1,
            'lr': 0.5,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
        
    
    for dropout_rate in dropout_rates:
        print("Currently using dropout rate of "+ str(dropout_rate))
        model_with_dropout = model_regularisation_dropout_between_layer.BiLSTM(2, args.fbank_dims * args.concat, args.model_dims, len(args.vocab), dropout_rate)
        num_params = sum(p.numel() for p in model_with_dropout.parameters())
        print('Total number of model parameters is {}'.format(num_params))
        start = datetime.now()
        model_with_dropout.to(args.device)
        if opt=="Adam":
            model_path = adam_trainer(model_with_dropout, args)
        else:
            model_path = sgd_trainer(model_with_dropout, args)
        end = datetime.now()
        duration = (end - start).total_seconds()
        print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
        print('Model saved to {}'.format(model_path))
    
    print("Finish "+ opt +" optimiser")
print("End tuning For 2 Layer LSTM")

Start dropout tuning For 2 Layer LSTM, with Dropout between layer
Currently using Adam optimiser
Currently using dropout rate of 0
Total number of model parameters is 562216
EPOCH 1:
  batch 50 loss: 6.777221741676331
  batch 100 loss: 3.231734290122986
  batch 150 loss: 3.0117234420776366
  batch 200 loss: 2.739203691482544
  batch 250 loss: 2.49772696018219
  batch 300 loss: 2.280819926261902
  batch 350 loss: 2.0985004711151123
  batch 400 loss: 2.033837769031525
  batch 450 loss: 1.9083922052383422
  batch 500 loss: 1.7859423303604125
  batch 550 loss: 1.704871723651886
  batch 600 loss: 1.6372292494773866
  batch 650 loss: 1.5587277841567992
  batch 700 loss: 1.5565199494361877
  batch 750 loss: 1.4802778553962708
  batch 800 loss: 1.4722290134429932
  batch 850 loss: 1.4219999361038207
  batch 900 loss: 1.3768940234184266
LOSS train 1.37689 valid 1.35433, valid PER 44.69%
EPOCH 2:
  batch 50 loss: 1.3078549885749817
  batch 100 loss: 1.263436793088913
  batch 150 loss: 1.22401164

  batch 150 loss: 0.4270486396551132
  batch 200 loss: 0.48605110466480256
  batch 250 loss: 0.4761529916524887
  batch 300 loss: 0.4680067265033722
  batch 350 loss: 0.4652142894268036
  batch 400 loss: 0.49012204229831696
  batch 450 loss: 0.4800450485944748
  batch 500 loss: 0.49930355429649353
  batch 550 loss: 0.4645304173231125
  batch 600 loss: 0.493482386469841
  batch 650 loss: 0.5114696103334427
  batch 700 loss: 0.5026888877153397
  batch 750 loss: 0.4701381701231003
  batch 800 loss: 0.49801873207092284
  batch 850 loss: 0.5409854078292846
  batch 900 loss: 0.5434351062774658
LOSS train 0.54344 valid 0.79501, valid PER 23.84%
EPOCH 13:
  batch 50 loss: 0.4189873450994492
  batch 100 loss: 0.43480975687503814
  batch 150 loss: 0.43463573724031446
  batch 200 loss: 0.4485123524069786
  batch 250 loss: 0.4482845941185951
  batch 300 loss: 0.4452875167131424
  batch 350 loss: 0.4303961306810379
  batch 400 loss: 0.45087008535861967
  batch 450 loss: 0.4630530846118927
  batch 5

  batch 100 loss: 1.1901494026184083
  batch 150 loss: 1.1722270369529724
  batch 200 loss: 1.1586070513725282
  batch 250 loss: 1.1552634203433991
  batch 300 loss: 1.1466247510910035
  batch 350 loss: 1.1795566546916962
  batch 400 loss: 1.1753727996349335
  batch 450 loss: 1.1328780114650727
  batch 500 loss: 1.1123747742176056
  batch 550 loss: 1.1307678949832916
  batch 600 loss: 1.0943857967853545
  batch 650 loss: 1.0853698945045471
  batch 700 loss: 1.1039607322216034
  batch 750 loss: 1.1546841776371002
  batch 800 loss: 1.0784635210037232
  batch 850 loss: 1.1137256383895875
  batch 900 loss: 1.0409421741962432
LOSS train 1.04094 valid 1.11770, valid PER 33.76%
EPOCH 4:
  batch 50 loss: 1.0222221565246583
  batch 100 loss: 1.0432953679561614
  batch 150 loss: 0.9977259993553161
  batch 200 loss: 1.0301785922050477
  batch 250 loss: 1.0364859843254088
  batch 300 loss: 1.041960676908493
  batch 350 loss: 0.9810562360286713
  batch 400 loss: 1.0125023698806763
  batch 450 loss:

  batch 350 loss: 0.4603298330307007
  batch 400 loss: 0.46741651862859723
  batch 450 loss: 0.46988209664821623
  batch 500 loss: 0.48167844653129577
  batch 550 loss: 0.5064626199007034
  batch 600 loss: 0.4711774265766144
  batch 650 loss: 0.48411192536354064
  batch 700 loss: 0.49940930247306825
  batch 750 loss: 0.48982963621616366
  batch 800 loss: 0.4542622309923172
  batch 850 loss: 0.49721058785915373
  batch 900 loss: 0.49705726027488706
LOSS train 0.49706 valid 0.77112, valid PER 23.06%
EPOCH 15:
  batch 50 loss: 0.42934529155492784
  batch 100 loss: 0.42993021368980405
  batch 150 loss: 0.4284532779455185
  batch 200 loss: 0.4588908052444458
  batch 250 loss: 0.47258240640163424
  batch 300 loss: 0.43589336425065994
  batch 350 loss: 0.4470321333408356
  batch 400 loss: 0.4491481798887253
  batch 450 loss: 0.44672223687171936
  batch 500 loss: 0.4237887018918991
  batch 550 loss: 0.4472121220827103
  batch 600 loss: 0.4635932970046997
  batch 650 loss: 0.4799240952730179
  

## 2. Wider LSTM

In [6]:
dropout_rates = [0, 0.1, 0.2, 0.3, 0.4, 0.5]
Optimiser = ["Adam", "SGD_Scheduler"]

In [None]:
import model_regularisation_dropout
from datetime import datetime
from trainer_SGD_Scheduler import train as sgd_trainer
from trainer_Adam import train as adam_trainer
import torch
from decoder import decode

print("Start tuning For wider 1 Layer LSTM")

for opt in Optimiser:
    print("Currently using "+ opt +" optimiser")
    if opt=="Adam":
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 1,
            'fbank_dims': 23,
            'model_dims': 512,
            'concat': 1,
            'lr': 0.001,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
    else:
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 1,
            'fbank_dims': 23,
            'model_dims': 512,
            'concat': 1,
            'lr': 0.5,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
        
    
    for dropout_rate in dropout_rates:
        print("Currently using dropout rate of "+ str(dropout_rate))
        model_with_dropout = model_regularisation_dropout.BiLSTM(args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab), dropout_rate)
        num_params = sum(p.numel() for p in model_with_dropout.parameters())
        print('Total number of model parameters is {}'.format(num_params))
        start = datetime.now()
        model_with_dropout.to(args.device)
        if opt=="Adam":
            model_path = adam_trainer(model_with_dropout, args)
        else:
            model_path = sgd_trainer(model_with_dropout, args)
        end = datetime.now()
        duration = (end - start).total_seconds()
        print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
        print('Model saved to {}'.format(model_path))
    
    print("Finish "+ opt +" optimiser")
print("End tuning For Wider 1 Layer LSTM")
        

Start tuning For wider 1 Layer LSTM
Currently using Adam optimiser
Currently using dropout rate of 0
Total number of model parameters is 2240552
EPOCH 1:
  batch 50 loss: 5.726706247329712
  batch 100 loss: 3.138441967964172
  batch 150 loss: 2.8376491594314577
  batch 200 loss: 2.5527011251449583
  batch 250 loss: 2.2950522255897523
  batch 300 loss: 2.0545081233978273
  batch 350 loss: 1.898708918094635
  batch 400 loss: 1.8701470947265626
  batch 450 loss: 1.7898587894439697
  batch 500 loss: 1.6957488441467286
  batch 550 loss: 1.6496214079856872
  batch 600 loss: 1.6056160998344422
  batch 650 loss: 1.5235937809944153
  batch 700 loss: 1.5432402300834656
  batch 750 loss: 1.4986415648460387
  batch 800 loss: 1.4788236808776856
  batch 850 loss: 1.4585586500167846
  batch 900 loss: 1.402013213634491
LOSS train 1.40201 valid 1.42909, valid PER 46.45%
EPOCH 2:
  batch 50 loss: 1.371263129711151
  batch 100 loss: 1.3106854343414307
  batch 150 loss: 1.2843054676055907
  batch 200 loss

  batch 200 loss: 0.43397910475730894
  batch 250 loss: 0.45739098608493806
  batch 300 loss: 0.44846841394901277
  batch 350 loss: 0.4416872185468674
  batch 400 loss: 0.46438679337501526
  batch 450 loss: 0.46913277626037597
  batch 500 loss: 0.4553913301229477
  batch 550 loss: 0.4454963910579681
  batch 600 loss: 0.47365712881088257
  batch 650 loss: 0.5065071672201157
  batch 700 loss: 0.48891753554344175
  batch 750 loss: 0.4605794471502304
  batch 800 loss: 0.46001900494098663
  batch 850 loss: 0.5104774069786072
  batch 900 loss: 0.5006308209896088
LOSS train 0.50063 valid 0.90428, valid PER 26.41%
EPOCH 13:
  batch 50 loss: 0.3751909387111664
  batch 100 loss: 0.393096704185009
  batch 150 loss: 0.3798890885710716
  batch 200 loss: 0.3980745232105255
  batch 250 loss: 0.387310808300972
  batch 300 loss: 0.39823910921812056
  batch 350 loss: 0.38475379019975664
  batch 400 loss: 0.41359849393367765
  batch 450 loss: 0.41483157098293305
  batch 500 loss: 0.39060602635145186
  ba

  batch 200 loss: 1.1009695327281952
  batch 250 loss: 1.0978144598007202
  batch 300 loss: 1.0664593660831452
  batch 350 loss: 1.126047250032425
  batch 400 loss: 1.0835916411876678
  batch 450 loss: 1.0731342446804046
  batch 500 loss: 1.0597980511188507
  batch 550 loss: 1.0891008639335633
  batch 600 loss: 1.1131614124774933
  batch 650 loss: 1.0344914877414704
  batch 700 loss: 1.0623319482803344
  batch 750 loss: 1.0972160494327545
  batch 800 loss: 1.0577577531337738
  batch 850 loss: 1.0749890291690827
  batch 900 loss: 1.0056093621253968
LOSS train 1.00561 valid 1.06972, valid PER 33.14%
EPOCH 4:
  batch 50 loss: 0.9784512102603913
  batch 100 loss: 0.9996351313591003
  batch 150 loss: 0.9614565968513489
  batch 200 loss: 1.0038666474819182
  batch 250 loss: 1.012203893661499
  batch 300 loss: 1.0148096644878388
  batch 350 loss: 0.9487750542163849
  batch 400 loss: 0.9766521227359771
  batch 450 loss: 0.960549201965332
  batch 500 loss: 0.9568304455280304
  batch 550 loss: 0

  batch 550 loss: 0.4401443070173264
  batch 600 loss: 0.41891752421855927
  batch 650 loss: 0.457554806470871
  batch 700 loss: 0.4616650241613388
  batch 750 loss: 0.44000388860702516
  batch 800 loss: 0.43410336434841157
  batch 850 loss: 0.45881778419017794
  batch 900 loss: 0.4639658087491989
LOSS train 0.46397 valid 0.93985, valid PER 26.06%
EPOCH 15:
  batch 50 loss: 0.3623331347107887
  batch 100 loss: 0.370635105073452
  batch 150 loss: 0.37327993154525757
  batch 200 loss: 0.3916913205385208
  batch 250 loss: 0.4001465755701065
  batch 300 loss: 0.36505696713924407
  batch 350 loss: 0.4057618850469589
  batch 400 loss: 0.3949092161655426
  batch 450 loss: 0.3913865712285042
  batch 500 loss: 0.3921893438696861
  batch 550 loss: 0.3975871765613556
  batch 600 loss: 0.40130920588970187
  batch 650 loss: 0.4351838484406471
  batch 700 loss: 0.4321661052107811
  batch 750 loss: 0.4383930063247681
  batch 800 loss: 0.4126331177353859
  batch 850 loss: 0.39263334482908246
  batch 9

  batch 600 loss: 0.9399257600307465
  batch 650 loss: 0.9166931736469269
  batch 700 loss: 0.9380678427219391
  batch 750 loss: 0.8745142185688018
  batch 800 loss: 0.8929941749572754
  batch 850 loss: 0.889417519569397
  batch 900 loss: 0.9058685731887818
LOSS train 0.90587 valid 0.98061, valid PER 31.34%
EPOCH 6:
  batch 50 loss: 0.8558193945884705
  batch 100 loss: 0.808549028635025
  batch 150 loss: 0.7946283864974976
  batch 200 loss: 0.8170462894439697
  batch 250 loss: 0.8496098268032074
  batch 300 loss: 0.8247335481643677
  batch 350 loss: 0.8310136151313782
  batch 400 loss: 0.81220743060112
  batch 450 loss: 0.8423798310756684
  batch 500 loss: 0.8253158605098725
  batch 550 loss: 0.8691041207313538
  batch 600 loss: 0.8117052364349365
  batch 650 loss: 0.8599151539802551
  batch 700 loss: 0.8403218531608582
  batch 750 loss: 0.8203808391094207
  batch 800 loss: 0.8105385470390319
  batch 850 loss: 0.8106956112384797
  batch 900 loss: 0.8315553915500641
LOSS train 0.83156 v

  batch 900 loss: 0.4502281594276428
LOSS train 0.45023 valid 0.96834, valid PER 26.26%
EPOCH 17:
  batch 50 loss: 0.35019983410835265
  batch 100 loss: 0.3477883306145668
  batch 150 loss: 0.345917287170887
  batch 200 loss: 0.36272934824228287
  batch 250 loss: 0.3991453218460083
  batch 300 loss: 0.37132582426071165
  batch 350 loss: 0.36983677059412
  batch 400 loss: 0.4002676439285278
  batch 450 loss: 0.3858865350484848
  batch 500 loss: 0.3746282762289047
  batch 550 loss: 0.36956184208393095
  batch 600 loss: 0.4082554429769516
  batch 650 loss: 0.39082739055156707
  batch 700 loss: 0.37902988731861115
  batch 750 loss: 0.3884440550208092
  batch 800 loss: 0.380169552564621
  batch 850 loss: 0.4077716556191444
  batch 900 loss: 0.39282498627901075
LOSS train 0.39282 valid 0.99293, valid PER 26.26%
EPOCH 18:
  batch 50 loss: 0.31030650705099105
  batch 100 loss: 0.33463876098394396
  batch 150 loss: 0.3307109159231186
  batch 200 loss: 0.3446583357453346
  batch 250 loss: 0.3596

  batch 50 loss: 0.6987799018621444
  batch 100 loss: 0.7084969627857208
  batch 150 loss: 0.7397369956970214
  batch 200 loss: 0.70780513048172
  batch 250 loss: 0.7076957523822784
  batch 300 loss: 0.7064252388477326
  batch 350 loss: 0.7485427570343017
  batch 400 loss: 0.7164519268274308
  batch 450 loss: 0.7429992949962616
  batch 500 loss: 0.7610198450088501
  batch 550 loss: 0.7278652691841125
  batch 600 loss: 0.7686130666732788
  batch 650 loss: 0.768517084121704
  batch 700 loss: 0.7251559239625931
  batch 750 loss: 0.7248374319076538
  batch 800 loss: 0.7374068689346314
  batch 850 loss: 0.7361820858716964
  batch 900 loss: 0.7340720927715302
LOSS train 0.73407 valid 0.88831, valid PER 27.79%
EPOCH 9:
  batch 50 loss: 0.6423114579916
  batch 100 loss: 0.646444879770279
  batch 150 loss: 0.6832466477155685
  batch 200 loss: 0.6380162060260772
  batch 250 loss: 0.6831538832187652
  batch 300 loss: 0.6860347223281861
  batch 350 loss: 0.6958754801750183
  batch 400 loss: 0.6891

  batch 300 loss: 0.3538187175989151
  batch 350 loss: 0.34531709969043733
  batch 400 loss: 0.34788135409355164
  batch 450 loss: 0.36827865898609163
  batch 500 loss: 0.366238369345665
  batch 550 loss: 0.35347097992897036
  batch 600 loss: 0.3500819593667984
  batch 650 loss: 0.3854894223809242
  batch 700 loss: 0.3600664559006691
  batch 750 loss: 0.34053280889987947
  batch 800 loss: 0.3782547268271446
  batch 850 loss: 0.37736049354076384
  batch 900 loss: 0.352489712536335
LOSS train 0.35249 valid 1.02452, valid PER 25.75%
EPOCH 20:
  batch 50 loss: 0.30359938114881513
  batch 100 loss: 0.2812565314769745
  batch 150 loss: 0.30425172626972197
  batch 200 loss: 0.30569772869348527
  batch 250 loss: 0.3246734693646431
  batch 300 loss: 0.3380791702866554
  batch 350 loss: 0.3027036327123642
  batch 400 loss: 0.3389632397890091
  batch 450 loss: 0.33962378561496737
  batch 500 loss: 0.3197706064581871
  batch 550 loss: 0.35421653509140016
  batch 600 loss: 0.3191822126507759
  batc

  batch 400 loss: 0.612696248292923
  batch 450 loss: 0.6242217975854873
  batch 500 loss: 0.6805233430862426
  batch 550 loss: 0.6815624338388443
  batch 600 loss: 0.6583178859949111
  batch 650 loss: 0.6289494854211807
  batch 700 loss: 0.6599508380889892
  batch 750 loss: 0.6289027231931686
  batch 800 loss: 0.6516576647758484
  batch 850 loss: 0.6508586102724075
  batch 900 loss: 0.6850400853157044
LOSS train 0.68504 valid 0.86538, valid PER 26.86%
EPOCH 11:
  batch 50 loss: 0.555969905257225
  batch 100 loss: 0.5475222170352936
  batch 150 loss: 0.564431546330452
  batch 200 loss: 0.590312522649765
  batch 250 loss: 0.618030053973198
  batch 300 loss: 0.5707537633180618
  batch 350 loss: 0.5890253174304962
  batch 400 loss: 0.621907993555069
  batch 450 loss: 0.6177930468320847
  batch 500 loss: 0.574165222644806
  batch 550 loss: 0.5926416766643524
  batch 600 loss: 0.6086423599720001
  batch 650 loss: 0.6560548102855682
  batch 700 loss: 0.5821153444051742
  batch 750 loss: 0.59

  batch 450 loss: 1.9406069827079773
  batch 500 loss: 1.8331819343566895
  batch 550 loss: 1.7778464007377623
  batch 600 loss: 1.816505057811737
  batch 650 loss: 1.6604608368873597
  batch 700 loss: 1.6627582383155823
  batch 750 loss: 1.632619755268097
  batch 800 loss: 1.6283797478675843
  batch 850 loss: 1.5945994782447814
  batch 900 loss: 1.5660619926452637
LOSS train 1.56606 valid 1.51303, valid PER 57.01%
EPOCH 2:
  batch 50 loss: 1.5128508257865905
  batch 100 loss: 1.490769157409668
  batch 150 loss: 1.4441561532020568
  batch 200 loss: 1.474134659767151
  batch 250 loss: 1.4800825500488282
  batch 300 loss: 1.4431217575073243
  batch 350 loss: 1.363006706237793
  batch 400 loss: 1.4252169585227967
  batch 450 loss: 1.3595135879516602
  batch 500 loss: 1.372188549041748
  batch 550 loss: 1.3990392017364501
  batch 600 loss: 1.3513495683670045
  batch 650 loss: 1.3680161499977113
  batch 700 loss: 1.3381013298034667
  batch 750 loss: 1.3187632942199707
  batch 800 loss: 1.28

  batch 800 loss: 0.6619769138097763
  batch 850 loss: 0.6815523439645768
  batch 900 loss: 0.6759602957963944
LOSS train 0.67596 valid 0.90234, valid PER 27.19%
EPOCH 13:
  batch 50 loss: 0.586477101445198
  batch 100 loss: 0.5866281563043594
  batch 150 loss: 0.5955002319812774
  batch 200 loss: 0.598739298582077
  batch 250 loss: 0.6045553684234619
  batch 300 loss: 0.5947940480709076
  batch 350 loss: 0.5889270478487014
  batch 400 loss: 0.59048230946064
  batch 450 loss: 0.59545989215374
  batch 500 loss: 0.5897282665967941
  batch 550 loss: 0.6249653321504592
  batch 600 loss: 0.6051031911373138
  batch 650 loss: 0.6102239519357682
  batch 700 loss: 0.6322624999284744
  batch 750 loss: 0.5929705196619034
  batch 800 loss: 0.6296602457761764
  batch 850 loss: 0.6282240587472916
  batch 900 loss: 0.6449568945169449
LOSS train 0.64496 valid 0.90885, valid PER 26.85%
EPOCH 14:
  batch 50 loss: 0.5406775206327439
  batch 100 loss: 0.5461617010831833
  batch 150 loss: 0.544828400611877

###### continue Adam tuning

In [4]:
dropout_rates = [0.5]
Optimiser = ["Adam"]

In [5]:
import model_regularisation_dropout
from datetime import datetime
from trainer_SGD_Scheduler import train as sgd_trainer
from trainer_Adam import train as adam_trainer
import torch
from decoder import decode

print("Start tuning For wider 1 Layer LSTM")

for opt in Optimiser:
    print("Currently using "+ opt +" optimiser")
    if opt=="Adam":
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 1,
            'fbank_dims': 23,
            'model_dims': 512,
            'concat': 1,
            'lr': 0.001,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
    else:
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 1,
            'fbank_dims': 23,
            'model_dims': 512,
            'concat': 1,
            'lr': 0.5,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
        
    
    for dropout_rate in dropout_rates:
        print("Currently using dropout rate of "+ str(dropout_rate))
        model_with_dropout = model_regularisation_dropout.BiLSTM(args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab), dropout_rate)
        num_params = sum(p.numel() for p in model_with_dropout.parameters())
        print('Total number of model parameters is {}'.format(num_params))
        start = datetime.now()
        model_with_dropout.to(args.device)
        if opt=="Adam":
            model_path = adam_trainer(model_with_dropout, args)
        else:
            model_path = sgd_trainer(model_with_dropout, args)
        end = datetime.now()
        duration = (end - start).total_seconds()
        print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
        print('Model saved to {}'.format(model_path))
    
    print("Finish "+ opt +" optimiser")
print("End tuning For Wider 1 Layer LSTM")
        

Start tuning For wider 1 Layer LSTM
Currently using Adam optimiser
Currently using dropout rate of 0.5
Total number of model parameters is 2240552
EPOCH 1:
  batch 50 loss: 5.755533928871155
  batch 100 loss: 3.1744091081619263
  batch 150 loss: 2.7647918128967284
  batch 200 loss: 2.4814211082458497
  batch 250 loss: 2.2743773555755613
  batch 300 loss: 2.10567253112793
  batch 350 loss: 1.9760479307174683
  batch 400 loss: 1.9497089934349061
  batch 450 loss: 1.842070724964142
  batch 500 loss: 1.772059552669525
  batch 550 loss: 1.7214161491394042
  batch 600 loss: 1.6797670388221742
  batch 650 loss: 1.6148813414573668
  batch 700 loss: 1.6050615429878234
  batch 750 loss: 1.5479127597808837
  batch 800 loss: 1.5451667237281799
  batch 850 loss: 1.5153027558326722
  batch 900 loss: 1.48150199174881
LOSS train 1.48150 valid 1.43681, valid PER 47.21%
EPOCH 2:
  batch 50 loss: 1.4306887841224671
  batch 100 loss: 1.3995813298225404
  batch 150 loss: 1.3545600533485413
  batch 200 loss

  batch 200 loss: 0.592981721162796
  batch 250 loss: 0.5903254455327988
  batch 300 loss: 0.5812612748146058
  batch 350 loss: 0.5784489917755127
  batch 400 loss: 0.624244077205658
  batch 450 loss: 0.6205977416038513
  batch 500 loss: 0.634345464706421
  batch 550 loss: 0.5750998115539551
  batch 600 loss: 0.6313319253921509
  batch 650 loss: 0.6349232512712478
  batch 700 loss: 0.6094418442249299
  batch 750 loss: 0.6235235285758972
  batch 800 loss: 0.6105699408054351
  batch 850 loss: 0.6446928799152374
  batch 900 loss: 0.6401231700181961
LOSS train 0.64012 valid 0.88616, valid PER 26.05%
EPOCH 13:
  batch 50 loss: 0.5229670441150666
  batch 100 loss: 0.5584414142370224
  batch 150 loss: 0.5396608966588974
  batch 200 loss: 0.5541112887859344
  batch 250 loss: 0.5553472250699997
  batch 300 loss: 0.5611824905872345
  batch 350 loss: 0.5646594762802124
  batch 400 loss: 0.5855917936563492
  batch 450 loss: 0.5765502154827118
  batch 500 loss: 0.5355299377441406
  batch 550 loss: 

In [4]:
dropout_rates = [0, 0.1, 0.2, 0.3, 0.4, 0.5]
Optimiser = ["SGD_Scheduler"]

In [5]:
import model_regularisation_dropout
from datetime import datetime
from trainer_SGD_Scheduler import train as sgd_trainer
from trainer_Adam import train as adam_trainer
import torch
from decoder import decode

print("Start tuning For wider 1 Layer LSTM")

for opt in Optimiser:
    print("Currently using "+ opt +" optimiser")
    if opt=="Adam":
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 1,
            'fbank_dims': 23,
            'model_dims': 512,
            'concat': 1,
            'lr': 0.001,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
    else:
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 1,
            'fbank_dims': 23,
            'model_dims': 512,
            'concat': 1,
            'lr': 0.5,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
        
    
    for dropout_rate in dropout_rates:
        print("Currently using dropout rate of "+ str(dropout_rate))
        model_with_dropout = model_regularisation_dropout.BiLSTM(args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab), dropout_rate)
        num_params = sum(p.numel() for p in model_with_dropout.parameters())
        print('Total number of model parameters is {}'.format(num_params))
        start = datetime.now()
        model_with_dropout.to(args.device)
        if opt=="Adam":
            model_path = adam_trainer(model_with_dropout, args)
        else:
            model_path = sgd_trainer(model_with_dropout, args)
        end = datetime.now()
        duration = (end - start).total_seconds()
        print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
        print('Model saved to {}'.format(model_path))
    
    print("Finish "+ opt +" optimiser")
print("End tuning For Wider 1 Layer LSTM")
        

Start tuning For wider 1 Layer LSTM
Currently using SGD_Scheduler optimiser
Currently using dropout rate of 0
Total number of model parameters is 2240552
EPOCH 1:
  batch 50 loss: 5.112564177513122
  batch 100 loss: 3.3055451250076295
  batch 150 loss: 3.1033377838134766
  batch 200 loss: 2.848377914428711
  batch 250 loss: 2.6916692543029783
  batch 300 loss: 2.5320166826248167
  batch 350 loss: 2.4163118410110473
  batch 400 loss: 2.3821061420440675
  batch 450 loss: 2.298174934387207
  batch 500 loss: 2.1950828647613525
  batch 550 loss: 2.148702714443207
  batch 600 loss: 2.0816590332984926
  batch 650 loss: 1.9812079930305482
  batch 700 loss: 1.978788387775421
  batch 750 loss: 1.910346179008484
  batch 800 loss: 1.8898598313331605
  batch 850 loss: 1.855208122730255
  batch 900 loss: 1.8303350925445556
LOSS train 1.83034 valid 1.77394, valid PER 66.49%
EPOCH 2:
  batch 50 loss: 1.7773738479614258
  batch 100 loss: 1.7048520350456238
  batch 150 loss: 1.687483582496643
  batch 20

  batch 200 loss: 0.545132417678833
  batch 250 loss: 0.5553662419319153
  batch 300 loss: 0.5499298286437988
  batch 350 loss: 0.5465474557876587
  batch 400 loss: 0.5794289118051529
  batch 450 loss: 0.5699537533521652
  batch 500 loss: 0.5821452504396438
  batch 550 loss: 0.5427112525701523
  batch 600 loss: 0.5712779611349106
  batch 650 loss: 0.5996668606996536
  batch 700 loss: 0.5973357647657395
  batch 750 loss: 0.5629798120260239
  batch 800 loss: 0.5807257044315338
  batch 850 loss: 0.6155434507131576
  batch 900 loss: 0.6034255754947663
Epoch 00012: reducing learning rate of group 0 to 2.5000e-01.
LOSS train 0.60343 valid 0.95061, valid PER 27.24%
EPOCH 13:
  batch 50 loss: 0.46423712879419327
  batch 100 loss: 0.44658171236515043
  batch 150 loss: 0.43232550263404845
  batch 200 loss: 0.4347713911533356
  batch 250 loss: 0.43242357224225997
  batch 300 loss: 0.42777034670114517
  batch 350 loss: 0.4211980152130127
  batch 400 loss: 0.4294757318496704
  batch 450 loss: 0.411

  batch 500 loss: 1.573147301673889
  batch 550 loss: 1.5530430817604064
  batch 600 loss: 1.5183616781234741
  batch 650 loss: 1.537800328731537
  batch 700 loss: 1.4968914556503297
  batch 750 loss: 1.4851606011390686
  batch 800 loss: 1.4263258004188537
  batch 850 loss: 1.432520112991333
  batch 900 loss: 1.440121932029724
LOSS train 1.44012 valid 1.40190, valid PER 46.74%
EPOCH 3:
  batch 50 loss: 1.4149198865890502
  batch 100 loss: 1.3668462562561035
  batch 150 loss: 1.3523076891899108
  batch 200 loss: 1.337201819419861
  batch 250 loss: 1.314195649623871
  batch 300 loss: 1.3120716691017151
  batch 350 loss: 1.361608247756958
  batch 400 loss: 1.326514518260956
  batch 450 loss: 1.282817393541336
  batch 500 loss: 1.2617322516441345
  batch 550 loss: 1.2751185286045075
  batch 600 loss: 1.2507956182956697
  batch 650 loss: 1.2217898857593537
  batch 700 loss: 1.240902339220047
  batch 750 loss: 1.3006118083000182
  batch 800 loss: 1.2163991916179657
  batch 850 loss: 1.236711

  batch 700 loss: 0.5481157433986664
  batch 750 loss: 0.4892977702617645
  batch 800 loss: 0.5161862432956695
  batch 850 loss: 0.5497252070903778
  batch 900 loss: 0.5532179367542267
LOSS train 0.55322 valid 0.84588, valid PER 24.99%
EPOCH 14:
  batch 50 loss: 0.49431702971458436
  batch 100 loss: 0.4992279362678528
  batch 150 loss: 0.49597338676452635
  batch 200 loss: 0.48906121760606763
  batch 250 loss: 0.49046292662620544
  batch 300 loss: 0.5274491453170777
  batch 350 loss: 0.4772061192989349
  batch 400 loss: 0.5012496158480644
  batch 450 loss: 0.5106979823112487
  batch 500 loss: 0.5080923062562942
  batch 550 loss: 0.5258022391796112
  batch 600 loss: 0.4810876148939133
  batch 650 loss: 0.507489197254181
  batch 700 loss: 0.5288402992486954
  batch 750 loss: 0.48455176293849944
  batch 800 loss: 0.4786686950922012
  batch 850 loss: 0.5238566321134567
  batch 900 loss: 0.5154296189546586
Epoch 00014: reducing learning rate of group 0 to 6.2500e-02.
LOSS train 0.51543 vali

  batch 150 loss: 1.1743949592113494
  batch 200 loss: 1.2062722384929656
  batch 250 loss: 1.1994979894161224
  batch 300 loss: 1.2017689144611359
  batch 350 loss: 1.1245360219478606
  batch 400 loss: 1.1951877510547637
  batch 450 loss: 1.1523856115341187
  batch 500 loss: 1.1503411555290222
  batch 550 loss: 1.187881201505661
  batch 600 loss: 1.1676056838035584
  batch 650 loss: 1.145479860305786
  batch 700 loss: 1.1362339019775392
  batch 750 loss: 1.117742292881012
  batch 800 loss: 1.0737801373004914
  batch 850 loss: 1.1173320603370667
  batch 900 loss: 1.1547860181331635
LOSS train 1.15479 valid 1.11181, valid PER 35.18%
EPOCH 5:
  batch 50 loss: 1.079086879491806
  batch 100 loss: 1.0637152349948884
  batch 150 loss: 1.121157455444336
  batch 200 loss: 1.0494711124897003
  batch 250 loss: 1.0627468037605285
  batch 300 loss: 1.078684856891632
  batch 350 loss: 1.0575164866447448
  batch 400 loss: 1.0783769500255584
  batch 450 loss: 1.0348233842849732
  batch 500 loss: 1.07

  batch 400 loss: 0.5244093930721283
  batch 450 loss: 0.5191688078641892
  batch 500 loss: 0.48099903285503387
  batch 550 loss: 0.5264372289180755
  batch 600 loss: 0.5361028283834457
  batch 650 loss: 0.5361140978336334
  batch 700 loss: 0.5496016174554825
  batch 750 loss: 0.5291775131225586
  batch 800 loss: 0.5205046576261521
  batch 850 loss: 0.5081042695045471
  batch 900 loss: 0.5370255672931671
Epoch 00015: reducing learning rate of group 0 to 1.2500e-01.
LOSS train 0.53703 valid 0.85936, valid PER 25.17%
EPOCH 16:
  batch 50 loss: 0.4806993353366852
  batch 100 loss: 0.44433277636766433
  batch 150 loss: 0.45800987422466277
  batch 200 loss: 0.46047449827194215
  batch 250 loss: 0.4648360592126846
  batch 300 loss: 0.4610844373703003
  batch 350 loss: 0.46602651953697205
  batch 400 loss: 0.4715236622095108
  batch 450 loss: 0.4781614488363266
  batch 500 loss: 0.4500925183296204
  batch 550 loss: 0.45829766392707827
  batch 600 loss: 0.43891619503498075
  batch 650 loss: 0.

LOSS train 1.07095 valid 1.03832, valid PER 33.73%
EPOCH 6:
  batch 50 loss: 1.0630870580673217
  batch 100 loss: 1.001206921339035
  batch 150 loss: 1.0120127773284913
  batch 200 loss: 1.0071174788475037
  batch 250 loss: 1.0457606875896455
  batch 300 loss: 1.0292725598812102
  batch 350 loss: 1.042575750350952
  batch 400 loss: 1.0068545746803284
  batch 450 loss: 1.0401436078548432
  batch 500 loss: 1.0014583003520965
  batch 550 loss: 1.0362426841259003
  batch 600 loss: 1.0132618129253388
  batch 650 loss: 1.0154386484622955
  batch 700 loss: 1.0191441416740417
  batch 750 loss: 1.0014268600940703
  batch 800 loss: 0.9972923946380615
  batch 850 loss: 0.9896293890476227
  batch 900 loss: 1.0055956959724426
LOSS train 1.00560 valid 1.02327, valid PER 32.30%
EPOCH 7:
  batch 50 loss: 0.9692187523841858
  batch 100 loss: 0.9912802934646606
  batch 150 loss: 0.9543732559680939
  batch 200 loss: 0.945831533074379
  batch 250 loss: 0.9593860542774201
  batch 300 loss: 0.94654191255569

  batch 150 loss: 0.4695447093248367
  batch 200 loss: 0.4596595650911331
  batch 250 loss: 0.47629631996154786
  batch 300 loss: 0.4864446094632149
  batch 350 loss: 0.45120300590991974
  batch 400 loss: 0.5016255861520768
  batch 450 loss: 0.4818764042854309
  batch 500 loss: 0.47168093144893647
  batch 550 loss: 0.47665621876716613
  batch 600 loss: 0.5030154162645339
  batch 650 loss: 0.4715789979696274
  batch 700 loss: 0.47409560799598693
  batch 750 loss: 0.47063573718070983
  batch 800 loss: 0.4678617638349533
  batch 850 loss: 0.48902686417102814
  batch 900 loss: 0.4754356372356415
Epoch 00017: reducing learning rate of group 0 to 6.2500e-02.
LOSS train 0.47544 valid 0.85536, valid PER 24.83%
EPOCH 18:
  batch 50 loss: 0.4526678740978241
  batch 100 loss: 0.4507301950454712
  batch 150 loss: 0.4690092611312866
  batch 200 loss: 0.44686767280101775
  batch 250 loss: 0.4540708392858505
  batch 300 loss: 0.42259536027908323
  batch 350 loss: 0.43620520800352097
  batch 400 loss:

  batch 850 loss: 1.01988254904747
  batch 900 loss: 1.0243774235248566
LOSS train 1.02438 valid 0.99432, valid PER 31.84%
EPOCH 8:
  batch 50 loss: 0.953800493478775
  batch 100 loss: 0.9404949700832367
  batch 150 loss: 0.9542383754253387
  batch 200 loss: 0.9253785753250122
  batch 250 loss: 0.9431506836414337
  batch 300 loss: 0.8929647672176361
  batch 350 loss: 0.9687912595272065
  batch 400 loss: 0.928209685087204
  batch 450 loss: 0.9316632425785065
  batch 500 loss: 0.9783075845241547
  batch 550 loss: 0.9084952092170715
  batch 600 loss: 0.9683585619926453
  batch 650 loss: 0.9679429543018341
  batch 700 loss: 0.9253095698356628
  batch 750 loss: 0.9478265035152436
  batch 800 loss: 0.9472755694389343
  batch 850 loss: 0.9495887982845307
  batch 900 loss: 0.951854737997055
LOSS train 0.95185 valid 0.97683, valid PER 31.12%
EPOCH 9:
  batch 50 loss: 0.8739570188522339
  batch 100 loss: 0.9037275898456574
  batch 150 loss: 0.8901740682125091
  batch 200 loss: 0.8723218142986298

Epoch 00018: reducing learning rate of group 0 to 3.1250e-02.
LOSS train 0.51380 valid 0.85656, valid PER 24.66%
EPOCH 19:
  batch 50 loss: 0.4814688217639923
  batch 100 loss: 0.47496295273303984
  batch 150 loss: 0.47737405449151993
  batch 200 loss: 0.48728883266448975
  batch 250 loss: 0.47549105405807496
  batch 300 loss: 0.48086371898651126
  batch 350 loss: 0.47651096403598786
  batch 400 loss: 0.47891995012760163
  batch 450 loss: 0.4965215283632278
  batch 500 loss: 0.4944687402248383
  batch 550 loss: 0.4642008376121521
  batch 600 loss: 0.4668017953634262
  batch 650 loss: 0.5261361753940582
  batch 700 loss: 0.48003258645534513
  batch 750 loss: 0.4717707180976868
  batch 800 loss: 0.49638315916061404
  batch 850 loss: 0.4817576628923416
  batch 900 loss: 0.48720639288425444
Epoch 00019: reducing learning rate of group 0 to 1.5625e-02.
LOSS train 0.48721 valid 0.86012, valid PER 24.62%
EPOCH 20:
  batch 50 loss: 0.4817079544067383
  batch 100 loss: 0.4687881565093994
  batc

  batch 800 loss: 0.9691673612594605
  batch 850 loss: 0.9623625814914704
  batch 900 loss: 0.9038423025608062
LOSS train 0.90384 valid 0.96019, valid PER 30.21%
EPOCH 10:
  batch 50 loss: 0.8670732367038727
  batch 100 loss: 0.8978613340854644
  batch 150 loss: 0.8891121423244477
  batch 200 loss: 0.9206712174415589
  batch 250 loss: 0.9210029590129852
  batch 300 loss: 0.8882624566555023
  batch 350 loss: 0.9017565858364105
  batch 400 loss: 0.8563500785827637
  batch 450 loss: 0.8770472311973572
  batch 500 loss: 0.9008853209018707
  batch 550 loss: 0.9373248088359832
  batch 600 loss: 0.8856323778629303
  batch 650 loss: 0.8803555023670196
  batch 700 loss: 0.9089763569831848
  batch 750 loss: 0.8930118119716645
  batch 800 loss: 0.8999055337905884
  batch 850 loss: 0.9025368964672089
  batch 900 loss: 0.8960216772556305
Epoch 00010: reducing learning rate of group 0 to 2.5000e-01.
LOSS train 0.89602 valid 0.98559, valid PER 31.58%
EPOCH 11:
  batch 50 loss: 0.816790611743927
  bat

  batch 650 loss: 0.5779419898986816
  batch 700 loss: 0.5749426466226578
  batch 750 loss: 0.5585409581661225
  batch 800 loss: 0.5949688035249711
  batch 850 loss: 0.5723340636491776
  batch 900 loss: 0.5915161174535751
Epoch 00020: reducing learning rate of group 0 to 3.9062e-03.
LOSS train 0.59152 valid 0.86209, valid PER 25.73%
Training finished in 8.0 minutes.
Model saved to checkpoints/20231206_185908/model_16
Finish SGD_Scheduler optimiser
End tuning For Wider 1 Layer LSTM


## 3. Uni-directional LSTM

In [4]:
dropout_rates = [0, 0.1, 0.2, 0.3, 0.4, 0.5]
Optimiser = ["Adam"]

In [None]:
import model_uni_directional_LSTM
from datetime import datetime
from trainer_SGD_Scheduler import train as sgd_trainer
from trainer_Adam import train as adam_trainer
import torch
from decoder import decode

print("Start tuning For uni directional 1 Layer LSTM")

for opt in Optimiser:
    print("Currently using "+ opt +" optimiser")
    if opt=="Adam":
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 210,
            'concat': 1,
            'lr': 0.001,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
    else:
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 210,
            'concat': 1,
            'lr': 0.5,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
        
    
    for dropout_rate in dropout_rates:
        print("Currently using dropout rate of "+ str(dropout_rate))
        model_with_dropout = model_uni_directional_LSTM.BiLSTM(args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab), dropout_rate)
        num_params = sum(p.numel() for p in model_with_dropout.parameters())
        print('Total number of model parameters is {}'.format(num_params))
        start = datetime.now()
        model_with_dropout.to(args.device)
        if opt=="Adam":
            model_path = adam_trainer(model_with_dropout, args)
        else:
            model_path = sgd_trainer(model_with_dropout, args)
        end = datetime.now()
        duration = (end - start).total_seconds()
        print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
        print('Model saved to {}'.format(model_path))
    
    print("Finish "+ opt +" optimiser")
print("End tuning For uni directional 1 Layer LSTM")

Start tuning For uni directional 1 Layer LSTM
Currently using Adam optimiser
Currently using dropout rate of 0
Total number of model parameters is 560320
EPOCH 1:
  batch 50 loss: 7.068644766807556
  batch 100 loss: 3.2868471431732176
  batch 150 loss: 3.2423207855224607
  batch 200 loss: 3.1164698123931887
  batch 250 loss: 2.9227201080322267
  batch 300 loss: 2.772088499069214
  batch 350 loss: 2.657766065597534
  batch 400 loss: 2.5803119564056396
  batch 450 loss: 2.4692926168441773
  batch 500 loss: 2.329936728477478
  batch 550 loss: 2.230182764530182
  batch 600 loss: 2.1393247032165528
  batch 650 loss: 2.01820960521698
  batch 700 loss: 1.9971644401550293
  batch 750 loss: 1.8908081555366516
  batch 800 loss: 1.8770714354515077
  batch 850 loss: 1.8051597023010253
  batch 900 loss: 1.7391205859184264
LOSS train 1.73912 valid 1.72848, valid PER 56.36%
EPOCH 2:
  batch 50 loss: 1.670622899532318
  batch 100 loss: 1.6264806747436524
  batch 150 loss: 1.576259949207306
  batch 200

  batch 200 loss: 0.8000277239084244
  batch 250 loss: 0.8315515196323395
  batch 300 loss: 0.7757016468048096
  batch 350 loss: 0.7851963746547699
  batch 400 loss: 0.8257591187953949
  batch 450 loss: 0.8080403232574462
  batch 500 loss: 0.814664855003357
  batch 550 loss: 0.7656943237781525
  batch 600 loss: 0.7739217472076416
  batch 650 loss: 0.8383294546604156
  batch 700 loss: 0.8056134557724
  batch 750 loss: 0.7768097698688508
  batch 800 loss: 0.7624419391155243
  batch 850 loss: 0.816886100769043
  batch 900 loss: 0.8003061127662658
LOSS train 0.80031 valid 0.91316, valid PER 29.25%
EPOCH 13:
  batch 50 loss: 0.7305805402994155
  batch 100 loss: 0.7535984486341476
  batch 150 loss: 0.7388305532932281
  batch 200 loss: 0.780689868927002
  batch 250 loss: 0.7537158250808715
  batch 300 loss: 0.7357066428661346
  batch 350 loss: 0.7261322766542435
  batch 400 loss: 0.7850597286224366
  batch 450 loss: 0.7965561699867248
  batch 500 loss: 0.754290064573288
  batch 550 loss: 0.77

  batch 300 loss: 1.3024451661109924
  batch 350 loss: 1.3586368155479431
  batch 400 loss: 1.3236861062049865
  batch 450 loss: 1.3044300782680511
  batch 500 loss: 1.303516070842743
  batch 550 loss: 1.3055153918266296
  batch 600 loss: 1.2570006799697877
  batch 650 loss: 1.2530989241600037
  batch 700 loss: 1.2610418105125427
  batch 750 loss: 1.3029775571823121
  batch 800 loss: 1.251253912448883
  batch 850 loss: 1.2910902321338653
  batch 900 loss: 1.238579704761505
LOSS train 1.23858 valid 1.27636, valid PER 40.29%
EPOCH 4:
  batch 50 loss: 1.2058268535137175
  batch 100 loss: 1.216104918718338
  batch 150 loss: 1.1927248644828796
  batch 200 loss: 1.1997068107128144
  batch 250 loss: 1.206126594543457
  batch 300 loss: 1.2462512385845184
  batch 350 loss: 1.1721721875667572
  batch 400 loss: 1.199078425168991
  batch 450 loss: 1.1843448555469513
  batch 500 loss: 1.1510384511947631
  batch 550 loss: 1.2062680447101592
  batch 600 loss: 1.207136217355728
  batch 650 loss: 1.184

  batch 650 loss: 0.7455121546983718
  batch 700 loss: 0.7831431257724762
  batch 750 loss: 0.7258519268035889
  batch 800 loss: 0.7119515711069107
  batch 850 loss: 0.7583411639928818
  batch 900 loss: 0.7272481453418732
LOSS train 0.72725 valid 0.90672, valid PER 28.26%
EPOCH 15:
  batch 50 loss: 0.6962152087688446
  batch 100 loss: 0.6889090347290039
  batch 150 loss: 0.6834739708900451
  batch 200 loss: 0.7156212919950485
  batch 250 loss: 0.7065595763921738
  batch 300 loss: 0.6894778108596802
  batch 350 loss: 0.7029316341876983
  batch 400 loss: 0.730038970708847
  batch 450 loss: 0.7253419208526611
  batch 500 loss: 0.6908002245426178
  batch 550 loss: 0.7369162499904632
  batch 600 loss: 0.7416010630130768
  batch 650 loss: 0.7327955377101898
  batch 700 loss: 0.728225474357605
  batch 750 loss: 0.7357064861059189
  batch 800 loss: 0.7078442579507828
  batch 850 loss: 0.7039832293987274
  batch 900 loss: 0.7311299717426301
LOSS train 0.73113 valid 0.89539, valid PER 27.70%
EPO

  batch 800 loss: 1.1242998957633972
  batch 850 loss: 1.0978145229816436
  batch 900 loss: 1.1208236336708068
LOSS train 1.12082 valid 1.14701, valid PER 36.21%
EPOCH 6:
  batch 50 loss: 1.1000030469894408
  batch 100 loss: 1.0552160215377808
  batch 150 loss: 1.0262992525100707
  batch 200 loss: 1.0740707695484162
  batch 250 loss: 1.1154062533378601
  batch 300 loss: 1.080606083869934
  batch 350 loss: 1.0602339291572571
  batch 400 loss: 1.0352595889568328
  batch 450 loss: 1.073378438949585
  batch 500 loss: 1.0503549551963807
  batch 550 loss: 1.093051769733429
  batch 600 loss: 1.0373570501804352
  batch 650 loss: 1.0649416732788086
  batch 700 loss: 1.0573769342899322
  batch 750 loss: 1.0252625465393066
  batch 800 loss: 1.0270174825191498
  batch 850 loss: 1.0170249295234681
  batch 900 loss: 1.0483681952953339
LOSS train 1.04837 valid 1.08995, valid PER 34.42%
EPOCH 7:
  batch 50 loss: 1.0447641813755035
  batch 100 loss: 1.0460060167312621
  batch 150 loss: 1.00511602759361

  batch 150 loss: 0.6742611169815064
  batch 200 loss: 0.698261433839798
  batch 250 loss: 0.6916088259220123
  batch 300 loss: 0.7112164133787156
  batch 350 loss: 0.6833381915092468
  batch 400 loss: 0.7471463686227798
  batch 450 loss: 0.7001542127132416
  batch 500 loss: 0.6981944358348846
  batch 550 loss: 0.6958925664424896
  batch 600 loss: 0.7441695201396942
  batch 650 loss: 0.6949682486057281
  batch 700 loss: 0.692716081738472
  batch 750 loss: 0.6755614966154099
  batch 800 loss: 0.6777801024913788
  batch 850 loss: 0.7058265697956085
  batch 900 loss: 0.692073946595192
LOSS train 0.69207 valid 0.89086, valid PER 27.42%
EPOCH 18:
  batch 50 loss: 0.6679976832866669
  batch 100 loss: 0.6528937137126922
  batch 150 loss: 0.6850659197568894
  batch 200 loss: 0.6646606969833374
  batch 250 loss: 0.6823423361778259
  batch 300 loss: 0.6630653411149978
  batch 350 loss: 0.6932913535833358
  batch 400 loss: 0.6619552934169769
  batch 450 loss: 0.6991228318214416
  batch 500 loss: 

  batch 300 loss: 0.9427116703987122
  batch 350 loss: 1.0354172229766845
  batch 400 loss: 0.977017011642456
  batch 450 loss: 1.0054696369171143
  batch 500 loss: 1.00422119140625
  batch 550 loss: 0.9683794724941254
  batch 600 loss: 1.0250252628326415
  batch 650 loss: 1.0454233229160308
  batch 700 loss: 0.9544640362262726
  batch 750 loss: 0.9876099634170532
  batch 800 loss: 0.9923493635654449
  batch 850 loss: 0.9833037805557251
  batch 900 loss: 0.9803268277645111
LOSS train 0.98033 valid 1.04263, valid PER 32.13%
EPOCH 9:
  batch 50 loss: 0.9125952112674713
  batch 100 loss: 0.9464164209365845
  batch 150 loss: 0.9530799508094787
  batch 200 loss: 0.9162597060203552
  batch 250 loss: 0.9521110844612122
  batch 300 loss: 0.9422731041908264
  batch 350 loss: 0.9801238977909088
  batch 400 loss: 0.9429351842403412
  batch 450 loss: 0.9554661011695862
  batch 500 loss: 0.9178290092945098
  batch 550 loss: 0.9766494119167328
  batch 600 loss: 0.9622375333309173
  batch 650 loss: 0

  batch 650 loss: 0.736552112698555
  batch 700 loss: 0.6551927828788757
  batch 750 loss: 0.6649380666017533
  batch 800 loss: 0.7096665036678315
  batch 850 loss: 0.7125500464439392
  batch 900 loss: 0.7164711463451385
LOSS train 0.71647 valid 0.88072, valid PER 26.93%
EPOCH 20:
  batch 50 loss: 0.6325559604167938
  batch 100 loss: 0.6451751148700714
  batch 150 loss: 0.6496818941831589
  batch 200 loss: 0.6569957983493805
  batch 250 loss: 0.6548853641748429
  batch 300 loss: 0.6984907245635986
  batch 350 loss: 0.6450159102678299
  batch 400 loss: 0.660501571893692
  batch 450 loss: 0.6663973706960679
  batch 500 loss: 0.6446354824304581
  batch 550 loss: 0.6997564119100571
  batch 600 loss: 0.6417151600122452
  batch 650 loss: 0.685376780629158
  batch 700 loss: 0.690725759267807
  batch 750 loss: 0.6581242877244949
  batch 800 loss: 0.6892657709121705
  batch 850 loss: 0.6989101016521454
  batch 900 loss: 0.6753958594799042
LOSS train 0.67540 valid 0.88265, valid PER 27.25%
Train

  batch 800 loss: 0.9495143926143647
  batch 850 loss: 0.9757311034202576
  batch 900 loss: 0.9828205323219299
LOSS train 0.98282 valid 1.00373, valid PER 31.06%
EPOCH 11:
  batch 50 loss: 0.9207299494743347
  batch 100 loss: 0.9087458670139312
  batch 150 loss: 0.9106666398048401
  batch 200 loss: 0.964024885892868
  batch 250 loss: 0.9544546425342559
  batch 300 loss: 0.8976307284832
  batch 350 loss: 0.9504489779472352
  batch 400 loss: 1.0083463442325593
  batch 450 loss: 0.9764277517795563
  batch 500 loss: 0.9129799127578735
  batch 550 loss: 0.9160435926914215
  batch 600 loss: 0.9348890674114227
  batch 650 loss: 0.9621075403690338
  batch 700 loss: 0.9044822025299072
  batch 750 loss: 0.9123932433128357
  batch 800 loss: 0.9511575508117676
  batch 850 loss: 0.9684589076042175
  batch 900 loss: 0.9541422259807587
LOSS train 0.95414 valid 0.97881, valid PER 30.90%
EPOCH 12:
  batch 50 loss: 0.9104132771492004
  batch 100 loss: 0.9145445728302002
  batch 150 loss: 0.8560525524616

LOSS train 1.90720 valid 1.79373, valid PER 62.94%
EPOCH 2:
  batch 50 loss: 1.8301711058616639
  batch 100 loss: 1.7926499128341675
  batch 150 loss: 1.7303179717063903
  batch 200 loss: 1.769501256942749
  batch 250 loss: 1.744200382232666
  batch 300 loss: 1.718890097141266
  batch 350 loss: 1.6274459266662598
  batch 400 loss: 1.6443031048774719
  batch 450 loss: 1.6095751333236694
  batch 500 loss: 1.6112739849090576
  batch 550 loss: 1.6410032892227173
  batch 600 loss: 1.5927890825271607
  batch 650 loss: 1.616264054775238
  batch 700 loss: 1.5981450653076172
  batch 750 loss: 1.5490399503707886
  batch 800 loss: 1.5195410609245301
  batch 850 loss: 1.532885057926178
  batch 900 loss: 1.5352124953269959
LOSS train 1.53521 valid 1.42954, valid PER 46.23%
EPOCH 3:
  batch 50 loss: 1.4949604225158692
  batch 100 loss: 1.4676747131347656
  batch 150 loss: 1.4578939700126647
  batch 200 loss: 1.4793711972236634
  batch 250 loss: 1.4336978101730347
  batch 300 loss: 1.431298966407776


In [4]:
dropout_rates = [0.5]
Optimiser = ["Adam"]

In [5]:
import model_uni_directional_LSTM
from datetime import datetime
from trainer_SGD_Scheduler import train as sgd_trainer
from trainer_Adam import train as adam_trainer
import torch
from decoder import decode

print("Start tuning For uni directional 1 Layer LSTM")

for opt in Optimiser:
    print("Currently using "+ opt +" optimiser")
    if opt=="Adam":
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 210,
            'concat': 1,
            'lr': 0.001,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
    else:
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 210,
            'concat': 1,
            'lr': 0.5,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
        
    
    for dropout_rate in dropout_rates:
        print("Currently using dropout rate of "+ str(dropout_rate))
        model_with_dropout = model_uni_directional_LSTM.BiLSTM(args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab), dropout_rate)
        num_params = sum(p.numel() for p in model_with_dropout.parameters())
        print('Total number of model parameters is {}'.format(num_params))
        start = datetime.now()
        model_with_dropout.to(args.device)
        if opt=="Adam":
            model_path = adam_trainer(model_with_dropout, args)
        else:
            model_path = sgd_trainer(model_with_dropout, args)
        end = datetime.now()
        duration = (end - start).total_seconds()
        print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
        print('Model saved to {}'.format(model_path))
    
    print("Finish "+ opt +" optimiser")
print("End tuning For uni directional 1 Layer LSTM")

Start tuning For uni directional 1 Layer LSTM
Currently using Adam optimiser
Currently using dropout rate of 0.5
Total number of model parameters is 560320
EPOCH 1:
  batch 50 loss: 6.976097302436829
  batch 100 loss: 3.2917252731323243
  batch 150 loss: 3.2514602851867678
  batch 200 loss: 3.1707065677642823
  batch 250 loss: 2.978942756652832
  batch 300 loss: 2.822084379196167
  batch 350 loss: 2.7268634033203125
  batch 400 loss: 2.666108479499817
  batch 450 loss: 2.5964044094085694
  batch 500 loss: 2.478789610862732
  batch 550 loss: 2.3846352005004885
  batch 600 loss: 2.299918322563171
  batch 650 loss: 2.193564465045929
  batch 700 loss: 2.166175916194916
  batch 750 loss: 2.0769609427452087
  batch 800 loss: 2.0440055990219115
  batch 850 loss: 1.983420639038086
  batch 900 loss: 1.9359222722053528
LOSS train 1.93592 valid 1.81685, valid PER 63.62%
EPOCH 2:
  batch 50 loss: 1.8677888631820678
  batch 100 loss: 1.8118211460113525
  batch 150 loss: 1.748062677383423
  batch 20

  batch 200 loss: 0.9350296974182128
  batch 250 loss: 0.9640889620780945
  batch 300 loss: 0.9281706213951111
  batch 350 loss: 0.9329260003566742
  batch 400 loss: 0.9653176987171173
  batch 450 loss: 0.9725274789333344
  batch 500 loss: 0.9895003342628479
  batch 550 loss: 0.9076323556900024
  batch 600 loss: 0.9635228085517883
  batch 650 loss: 0.9959461188316345
  batch 700 loss: 0.964137521982193
  batch 750 loss: 0.94458944439888
  batch 800 loss: 0.9345377123355866
  batch 850 loss: 0.9751678979396821
  batch 900 loss: 0.9743003284931183
LOSS train 0.97430 valid 0.97837, valid PER 31.46%
EPOCH 13:
  batch 50 loss: 0.9073807609081268
  batch 100 loss: 0.9450461864471436
  batch 150 loss: 0.8791310358047485
  batch 200 loss: 0.9613366317749024
  batch 250 loss: 0.9270772182941437
  batch 300 loss: 0.8997859239578248
  batch 350 loss: 0.9119932532310486
  batch 400 loss: 0.9332650446891785
  batch 450 loss: 0.941560308933258
  batch 500 loss: 0.8999531078338623
  batch 550 loss: 0

In [6]:
dropout_rates = [0, 0.1, 0.2, 0.3, 0.4, 0.5]
Optimiser = ["SGD_Scheduler"]

In [None]:
import model_uni_directional_LSTM
from datetime import datetime
from trainer_SGD_Scheduler import train as sgd_trainer
from trainer_Adam import train as adam_trainer
import torch
from decoder import decode

print("Start tuning For uni directional 1 Layer LSTM")

for opt in Optimiser:
    print("Currently using "+ opt +" optimiser")
    if opt=="Adam":
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 210,
            'concat': 1,
            'lr': 0.001,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
    else:
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 210,
            'concat': 1,
            'lr': 0.5,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
        
    
    for dropout_rate in dropout_rates:
        print("Currently using dropout rate of "+ str(dropout_rate))
        model_with_dropout = model_uni_directional_LSTM.BiLSTM(args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab), dropout_rate)
        num_params = sum(p.numel() for p in model_with_dropout.parameters())
        print('Total number of model parameters is {}'.format(num_params))
        start = datetime.now()
        model_with_dropout.to(args.device)
        if opt=="Adam":
            model_path = adam_trainer(model_with_dropout, args)
        else:
            model_path = sgd_trainer(model_with_dropout, args)
        end = datetime.now()
        duration = (end - start).total_seconds()
        print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
        print('Model saved to {}'.format(model_path))
    
    print("Finish "+ opt +" optimiser")
print("End tuning For uni directional 1 Layer LSTM")

Start tuning For uni directional 1 Layer LSTM
Currently using SGD_Scheduler optimiser
Currently using dropout rate of 0
Total number of model parameters is 560320
EPOCH 1:
  batch 50 loss: 5.239828119277954
  batch 100 loss: 3.409066710472107
  batch 150 loss: 3.30587975025177
  batch 200 loss: 3.249364848136902
  batch 250 loss: 3.191646680831909
  batch 300 loss: 3.0570363903045656
  batch 350 loss: 2.901274256706238
  batch 400 loss: 2.745630340576172
  batch 450 loss: 2.66358802318573
  batch 500 loss: 2.528423981666565
  batch 550 loss: 2.431274275779724
  batch 600 loss: 2.360090832710266
  batch 650 loss: 2.2537232780456544
  batch 700 loss: 2.2244571018218995
  batch 750 loss: 2.1404535603523254
  batch 800 loss: 2.1029496955871583
  batch 850 loss: 2.0288353419303893
  batch 900 loss: 1.9828188681602479
LOSS train 1.98282 valid 1.91523, valid PER 70.89%
EPOCH 2:
  batch 50 loss: 1.9027397084236144
  batch 100 loss: 1.8303321027755737
  batch 150 loss: 1.7563483500480652
  batc

  batch 100 loss: 0.7130303448438644
  batch 150 loss: 0.6817633861303329
  batch 200 loss: 0.721118580698967
  batch 250 loss: 0.7276902294158936
  batch 300 loss: 0.6977686738967895
  batch 350 loss: 0.7144143289327621
  batch 400 loss: 0.7453233313560486
  batch 450 loss: 0.7283589160442352
  batch 500 loss: 0.7314473873376847
  batch 550 loss: 0.6786418908834457
  batch 600 loss: 0.6990543532371521
  batch 650 loss: 0.7377239680290222
  batch 700 loss: 0.7179063057899475
  batch 750 loss: 0.7159662920236588
  batch 800 loss: 0.7024925684928894
  batch 850 loss: 0.7389034390449524
  batch 900 loss: 0.7483813053369522
LOSS train 0.74838 valid 0.86546, valid PER 27.07%
EPOCH 13:
  batch 50 loss: 0.6643338096141815
  batch 100 loss: 0.7000603151321411
  batch 150 loss: 0.675392650961876
  batch 200 loss: 0.6954650175571442
  batch 250 loss: 0.7076703292131424
  batch 300 loss: 0.6943372750282287
  batch 350 loss: 0.6767704182863236
  batch 400 loss: 0.7219039410352707
  batch 450 loss:

  batch 50 loss: 1.4124713921546936
  batch 100 loss: 1.4164020538330078
  batch 150 loss: 1.3866048312187196
  batch 200 loss: 1.3661624813079833
  batch 250 loss: 1.347643632888794
  batch 300 loss: 1.344867112636566
  batch 350 loss: 1.3798460340499878
  batch 400 loss: 1.3616048645973207
  batch 450 loss: 1.3251932215690614
  batch 500 loss: 1.337709937095642
  batch 550 loss: 1.3134056520462036
  batch 600 loss: 1.31200803399086
  batch 650 loss: 1.2547938823699951
  batch 700 loss: 1.2699825048446656
  batch 750 loss: 1.345962507724762
  batch 800 loss: 1.2692055797576904
  batch 850 loss: 1.3057332730293274
  batch 900 loss: 1.2334891939163208
LOSS train 1.23349 valid 1.25444, valid PER 40.05%
EPOCH 4:
  batch 50 loss: 1.2374767899513244
  batch 100 loss: 1.23666219830513
  batch 150 loss: 1.1944410634040832
  batch 200 loss: 1.2149111711978913
  batch 250 loss: 1.2289338529109954
  batch 300 loss: 1.272038996219635
  batch 350 loss: 1.1680487787723541
  batch 400 loss: 1.192639

  batch 350 loss: 0.6830681312084198
  batch 400 loss: 0.6733462285995483
  batch 450 loss: 0.6726241570711136
  batch 500 loss: 0.7026231038570404
  batch 550 loss: 0.7130435907840729
  batch 600 loss: 0.6820482271909714
  batch 650 loss: 0.7153279852867126
  batch 700 loss: 0.7199150443077087
  batch 750 loss: 0.6893043661117554
  batch 800 loss: 0.6718603956699372
  batch 850 loss: 0.6981209981441497
  batch 900 loss: 0.7076051557064056
LOSS train 0.70761 valid 0.86032, valid PER 27.02%
EPOCH 15:
  batch 50 loss: 0.6804235732555389
  batch 100 loss: 0.6769647318124771
  batch 150 loss: 0.6745946675539016
  batch 200 loss: 0.6736989867687225
  batch 250 loss: 0.6812232285737991
  batch 300 loss: 0.6580342996120453
  batch 350 loss: 0.6682374316453934
  batch 400 loss: 0.6634026020765305
  batch 450 loss: 0.6767765283584595
  batch 500 loss: 0.6353346568346023
  batch 550 loss: 0.6990605318546295
  batch 600 loss: 0.6863044607639313
  batch 650 loss: 0.6865588283538818
  batch 700 los

  batch 250 loss: 1.1263668286800383
  batch 300 loss: 1.1321280455589295
  batch 350 loss: 1.1406924223899841
  batch 400 loss: 1.1430746030807495
  batch 450 loss: 1.1445577669143676
  batch 500 loss: 1.1481684637069702
  batch 550 loss: 1.1147120308876037
  batch 600 loss: 1.1620667099952697
  batch 650 loss: 1.1246913027763368
  batch 700 loss: 1.1724723386764526
  batch 750 loss: 1.1272145116329193
  batch 800 loss: 1.1421124172210693
  batch 850 loss: 1.1473607337474823
  batch 900 loss: 1.1295252895355226
LOSS train 1.12953 valid 1.12988, valid PER 36.14%
EPOCH 6:
  batch 50 loss: 1.1210280549526215
  batch 100 loss: 1.0773859930038452
  batch 150 loss: 1.0770939218997955
  batch 200 loss: 1.0951190757751466
  batch 250 loss: 1.0994775307178497
  batch 300 loss: 1.0754377603530885
  batch 350 loss: 1.0861828207969666
  batch 400 loss: 1.066983026266098
  batch 450 loss: 1.1156850790977477
  batch 500 loss: 1.0897267591953277
  batch 550 loss: 1.129799188375473
  batch 600 loss: 

  batch 450 loss: 0.6877789056301117
  batch 500 loss: 0.6694679528474807
  batch 550 loss: 0.6596268856525421
  batch 600 loss: 0.675288051366806
  batch 650 loss: 0.6895640683174133
  batch 700 loss: 0.6588101857900619
  batch 750 loss: 0.6615969389677048
  batch 800 loss: 0.675544280409813
  batch 850 loss: 0.6718381321430207
  batch 900 loss: 0.6795142406225204
LOSS train 0.67951 valid 0.84318, valid PER 26.54%
EPOCH 17:
  batch 50 loss: 0.6715884816646576
  batch 100 loss: 0.6654471242427826
  batch 150 loss: 0.6455340385437012
  batch 200 loss: 0.6466799306869507
  batch 250 loss: 0.6653047299385071
  batch 300 loss: 0.658716213107109
  batch 350 loss: 0.6387039971351623
  batch 400 loss: 0.7036819934844971
  batch 450 loss: 0.6695623189210892
  batch 500 loss: 0.6479550737142563
  batch 550 loss: 0.6641303032636643
  batch 600 loss: 0.6986031228303909
  batch 650 loss: 0.6362096673250198
  batch 700 loss: 0.6629941529035568
  batch 750 loss: 0.6468743020296097
  batch 800 loss: 

  batch 350 loss: 0.9705314767360688
  batch 400 loss: 0.9769237768650055
  batch 450 loss: 0.9647226679325104
  batch 500 loss: 0.9653905010223389
  batch 550 loss: 0.953463739156723
  batch 600 loss: 0.983247742652893
  batch 650 loss: 0.9504810607433319
  batch 700 loss: 0.9881583452224731
  batch 750 loss: 0.9624038124084473
  batch 800 loss: 0.9683730769157409
  batch 850 loss: 0.9825802886486054
  batch 900 loss: 1.0036060452461242
LOSS train 1.00361 valid 0.98764, valid PER 31.66%
EPOCH 8:
  batch 50 loss: 0.951329540014267
  batch 100 loss: 0.9319286501407623
  batch 150 loss: 0.927796368598938
  batch 200 loss: 0.9275277984142304
  batch 250 loss: 0.9382867646217347
  batch 300 loss: 0.8936598765850067
  batch 350 loss: 0.9379580199718476
  batch 400 loss: 0.9150162827968598
  batch 450 loss: 0.9696805381774902
  batch 500 loss: 0.9656519091129303
  batch 550 loss: 0.8945220172405243
  batch 600 loss: 0.9614198207855225
  batch 650 loss: 0.9776541590690613
  batch 700 loss: 0.

  batch 450 loss: 0.7024477958679199
  batch 500 loss: 0.6875839895009994
  batch 550 loss: 0.6883374184370041
  batch 600 loss: 0.6566585552692413
  batch 650 loss: 0.6746353662014007
  batch 700 loss: 0.697833258509636
  batch 750 loss: 0.6781710612773896
  batch 800 loss: 0.6637485337257385
  batch 850 loss: 0.663476352095604
  batch 900 loss: 0.7079851365089417
Epoch 00018: reducing learning rate of group 0 to 1.5625e-02.
LOSS train 0.70799 valid 0.84899, valid PER 26.48%
EPOCH 19:
  batch 50 loss: 0.6571967673301696
  batch 100 loss: 0.6528203129768372
  batch 150 loss: 0.6612490504980088
  batch 200 loss: 0.6811364185810089
  batch 250 loss: 0.6986423897743225
  batch 300 loss: 0.6737143975496293
  batch 350 loss: 0.6787929058074951
  batch 400 loss: 0.6850334757566452
  batch 450 loss: 0.6785662531852722
  batch 500 loss: 0.6906520080566406
  batch 550 loss: 0.6669913005828857
  batch 600 loss: 0.6845183271169663
  batch 650 loss: 0.7104526370763778
  batch 700 loss: 0.649028125

  batch 400 loss: 0.9682376062870026
  batch 450 loss: 0.9630557763576507
  batch 500 loss: 0.9215477693080902
  batch 550 loss: 0.9720089423656464
  batch 600 loss: 0.9698984253406525
  batch 650 loss: 0.9438456380367279
  batch 700 loss: 0.9316515064239502
  batch 750 loss: 0.9590212059020996
  batch 800 loss: 0.9771483039855957
  batch 850 loss: 0.9863555002212524
  batch 900 loss: 0.9233079123497009
LOSS train 0.92331 valid 0.96376, valid PER 30.43%
EPOCH 10:
  batch 50 loss: 0.9229220414161682
  batch 100 loss: 0.9079756247997284
  batch 150 loss: 0.9436542534828186
  batch 200 loss: 0.9607348930835724
  batch 250 loss: 0.9497406005859375
  batch 300 loss: 0.8954160416126251
  batch 350 loss: 0.9333334529399872
  batch 400 loss: 0.8923700320720672
  batch 450 loss: 0.8912749457359314
  batch 500 loss: 0.942800315618515
  batch 550 loss: 0.9439060842990875
  batch 600 loss: 0.9191083168983459
  batch 650 loss: 0.9195622062683105
  batch 700 loss: 0.9471275746822357
  batch 750 loss

In [4]:
dropout_rates = [0.5]
Optimiser = ["SGD_Scheduler"]

In [5]:
import model_uni_directional_LSTM
from datetime import datetime
from trainer_SGD_Scheduler import train as sgd_trainer
from trainer_Adam import train as adam_trainer
import torch
from decoder import decode

print("Start tuning For uni directional 1 Layer LSTM")

for opt in Optimiser:
    print("Currently using "+ opt +" optimiser")
    if opt=="Adam":
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 210,
            'concat': 1,
            'lr': 0.001,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
    else:
        args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 210,
            'concat': 1,
            'lr': 0.5,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

        args = namedtuple('x', args)(**args)
        
    
    for dropout_rate in dropout_rates:
        print("Currently using dropout rate of "+ str(dropout_rate))
        model_with_dropout = model_uni_directional_LSTM.BiLSTM(args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab), dropout_rate)
        num_params = sum(p.numel() for p in model_with_dropout.parameters())
        print('Total number of model parameters is {}'.format(num_params))
        start = datetime.now()
        model_with_dropout.to(args.device)
        if opt=="Adam":
            model_path = adam_trainer(model_with_dropout, args)
        else:
            model_path = sgd_trainer(model_with_dropout, args)
        end = datetime.now()
        duration = (end - start).total_seconds()
        print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
        print('Model saved to {}'.format(model_path))
    
    print("Finish "+ opt +" optimiser")
print("End tuning For uni directional 1 Layer LSTM")

Start tuning For uni directional 1 Layer LSTM
Currently using SGD_Scheduler optimiser
Currently using dropout rate of 0.5
Total number of model parameters is 560320
EPOCH 1:
  batch 50 loss: 5.186969437599182
  batch 100 loss: 3.397619910240173
  batch 150 loss: 3.299616980552673
  batch 200 loss: 3.245315599441528
  batch 250 loss: 3.1675468111038207
  batch 300 loss: 3.0078634881973265
  batch 350 loss: 2.8707586669921876
  batch 400 loss: 2.7618620109558107
  batch 450 loss: 2.6840696287155152
  batch 500 loss: 2.5458651304244997
  batch 550 loss: 2.4563596439361572
  batch 600 loss: 2.389314250946045
  batch 650 loss: 2.3026261281967164
  batch 700 loss: 2.2769479751586914
  batch 750 loss: 2.2202400183677673
  batch 800 loss: 2.1928952169418334
  batch 850 loss: 2.1146387338638304
  batch 900 loss: 2.082296385765076
LOSS train 2.08230 valid 2.02669, valid PER 74.22%
EPOCH 2:
  batch 50 loss: 2.0402814316749573
  batch 100 loss: 1.97362407207489
  batch 150 loss: 1.9304421377182006

  batch 50 loss: 0.8658894455432892
  batch 100 loss: 0.8490141677856445
  batch 150 loss: 0.8124578773975373
  batch 200 loss: 0.848634408712387
  batch 250 loss: 0.8627073240280151
  batch 300 loss: 0.8424561607837677
  batch 350 loss: 0.8525070834159851
  batch 400 loss: 0.8700098133087159
  batch 450 loss: 0.849846156835556
  batch 500 loss: 0.8500027275085449
  batch 550 loss: 0.78084756731987
  batch 600 loss: 0.8209719610214233
  batch 650 loss: 0.8580285489559174
  batch 700 loss: 0.8265656125545502
  batch 750 loss: 0.8130369752645492
  batch 800 loss: 0.8165104645490646
  batch 850 loss: 0.8441041958332062
  batch 900 loss: 0.854948388338089
LOSS train 0.85495 valid 0.88633, valid PER 28.07%
EPOCH 13:
  batch 50 loss: 0.8003463280200959
  batch 100 loss: 0.8320787823200226
  batch 150 loss: 0.7940618348121643
  batch 200 loss: 0.8403228461742401
  batch 250 loss: 0.815920889377594
  batch 300 loss: 0.7913719844818116
  batch 350 loss: 0.8133838403224946
  batch 400 loss: 0.82

## Decoding

In [1]:
from dataloader import get_dataloader
import torch
import numpy as np
# First find the unique phones in train.json, and then
# create a file named vocab.txt, each line in this 
# file is a unique phone, in total there should be 
# 40 lines

vocab = {}
phonemes = []
with open("vocab_39.txt") as f:
    for id, text in enumerate(f):
        vocab[text.strip()] = id
        phonemes.append(text)
phonemes = phonemes[1:]
from collections import namedtuple
if torch.cuda.is_available():
    device = "cuda:0"
    print("currently using cuda")
else:
    device = "cpu"
    print("currently using cpu only")

args = {'seed': 123,
        'train_json': 'train_fbank.json',
        'val_json': 'dev_fbank.json',
        'test_json': 'test_fbank.json',
        'batch_size': 4,
        'num_layers': 1,
        'fbank_dims': 23,
        'model_dims': 128,
        'concat': 1,
        'lr': 0.5,
        'vocab': vocab,
        'report_interval': 50,
        'num_epochs': 20,
        'device': device,
       }

args = namedtuple('x', args)(**args)

currently using cpu only


In [2]:
### You can uncomment the following line and change model path to the model you want to decode
model_path="checkpoints/20231206_144057/model_20"

In [3]:
import model_regularisation_dropout_between_layer
args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 128,
            'concat': 1,
            'lr': 0.5,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

args = namedtuple('x', args)(**args)
model = model_regularisation_dropout_between_layer.BiLSTM(2, args.fbank_dims * args.concat, args.model_dims, len(args.vocab),0)

In [4]:
import torch
print('Loading model from {}'.format(model_path))
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()

Loading model from checkpoints/20231206_144057/model_20


BiLSTM(
  (lstm): LSTM(23, 128, num_layers=2, bidirectional=True)
  (proj): Linear(in_features=256, out_features=40, bias=True)
)

In [5]:
from decoder import decode
results = decode(model, args, args.test_json)
print("SUB: {:.2f}%, DEL: {:.2f}%, INS: {:.2f}%, COR: {:.2f}%, PER: {:.2f}%".format(*results))

SUB: 14.44%, DEL: 6.58%, INS: 2.93%, COR: 78.98%, PER: 23.95%


# Decode With Blank Penalty -0.1

In [1]:
from dataloader import get_dataloader
import torch
import numpy as np
# First find the unique phones in train.json, and then
# create a file named vocab.txt, each line in this 
# file is a unique phone, in total there should be 
# 40 lines

vocab = {}
phonemes = []
with open("vocab_39.txt") as f:
    for id, text in enumerate(f):
        vocab[text.strip()] = id
        phonemes.append(text)
phonemes = phonemes[1:]
from collections import namedtuple
if torch.cuda.is_available():
    device = "cuda:0"
    print("currently using cuda")
else:
    device = "cpu"
    print("currently using cpu only")

args = {'seed': 123,
        'train_json': 'train_fbank.json',
        'val_json': 'dev_fbank.json',
        'test_json': 'test_fbank.json',
        'batch_size': 4,
        'num_layers': 1,
        'fbank_dims': 23,
        'model_dims': 128,
        'concat': 1,
        'lr': 0.5,
        'vocab': vocab,
        'report_interval': 50,
        'num_epochs': 20,
        'device': device,
       }

args = namedtuple('x', args)(**args)

currently using cpu only


In [2]:
### You can uncomment the following line and change model path to the model you want to decode
model_path="checkpoints/20231206_144057/model_20"

In [3]:
import model_regularisation_dropout_between_layer
args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 128,
            'concat': 1,
            'lr': 0.5,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

args = namedtuple('x', args)(**args)
model = model_regularisation_dropout_between_layer.BiLSTM(2, args.fbank_dims * args.concat, args.model_dims, len(args.vocab),0)

In [4]:
import torch
print('Loading model from {}'.format(model_path))
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()

Loading model from checkpoints/20231206_144057/model_20


BiLSTM(
  (lstm): LSTM(23, 128, num_layers=2, bidirectional=True)
  (proj): Linear(in_features=256, out_features=40, bias=True)
)

In [None]:
from decoder import decode
results = decode(model, args, args.test_json)
print("SUB: {:.2f}%, DEL: {:.2f}%, INS: {:.2f}%, COR: {:.2f}%, PER: {:.2f}%".format(*results))

# -0.005

In [1]:
from dataloader import get_dataloader
import torch
import numpy as np
# First find the unique phones in train.json, and then
# create a file named vocab.txt, each line in this 
# file is a unique phone, in total there should be 
# 40 lines

vocab = {}
phonemes = []
with open("vocab_39.txt") as f:
    for id, text in enumerate(f):
        vocab[text.strip()] = id
        phonemes.append(text)
phonemes = phonemes[1:]
from collections import namedtuple
if torch.cuda.is_available():
    device = "cuda:0"
    print("currently using cuda")
else:
    device = "cpu"
    print("currently using cpu only")
### You can uncomment the following line and change model path to the model you want to decode
model_path="checkpoints/20231206_144057/model_20"
import model_regularisation_dropout_between_layer
args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 128,
            'concat': 1,
            'lr': 0.5,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

args = namedtuple('x', args)(**args)
model = model_regularisation_dropout_between_layer.BiLSTM(2, args.fbank_dims * args.concat, args.model_dims, len(args.vocab),0)
import torch
print('Loading model from {}'.format(model_path))
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()
from decoder import decode
results = decode(model, args, args.test_json)
print("SUB: {:.2f}%, DEL: {:.2f}%, INS: {:.2f}%, COR: {:.2f}%, PER: {:.2f}%".format(*results))

currently using cpu only
Loading model from checkpoints/20231206_144057/model_20
SUB: 14.47%, DEL: 6.53%, INS: 2.95%, COR: 78.99%, PER: 23.95%


# -0.01

In [1]:
from dataloader import get_dataloader
import torch
import numpy as np
# First find the unique phones in train.json, and then
# create a file named vocab.txt, each line in this 
# file is a unique phone, in total there should be 
# 40 lines

vocab = {}
phonemes = []
with open("vocab_39.txt") as f:
    for id, text in enumerate(f):
        vocab[text.strip()] = id
        phonemes.append(text)
phonemes = phonemes[1:]
from collections import namedtuple
if torch.cuda.is_available():
    device = "cuda:0"
    print("currently using cuda")
else:
    device = "cpu"
    print("currently using cpu only")
### You can uncomment the following line and change model path to the model you want to decode
model_path="checkpoints/20231206_144057/model_20"
import model_regularisation_dropout_between_layer
args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 128,
            'concat': 1,
            'lr': 0.5,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

args = namedtuple('x', args)(**args)
model = model_regularisation_dropout_between_layer.BiLSTM(2, args.fbank_dims * args.concat, args.model_dims, len(args.vocab),0)
import torch
print('Loading model from {}'.format(model_path))
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()
from decoder import decode
results = decode(model, args, args.test_json)
print("SUB: {:.2f}%, DEL: {:.2f}%, INS: {:.2f}%, COR: {:.2f}%, PER: {:.2f}%".format(*results))

currently using cpu only
Loading model from checkpoints/20231206_144057/model_20
SUB: 14.49%, DEL: 6.51%, INS: 2.98%, COR: 79.01%, PER: 23.97%


# -0.02

In [1]:
from dataloader import get_dataloader
import torch
import numpy as np
# First find the unique phones in train.json, and then
# create a file named vocab.txt, each line in this 
# file is a unique phone, in total there should be 
# 40 lines

vocab = {}
phonemes = []
with open("vocab_39.txt") as f:
    for id, text in enumerate(f):
        vocab[text.strip()] = id
        phonemes.append(text)
phonemes = phonemes[1:]
from collections import namedtuple
if torch.cuda.is_available():
    device = "cuda:0"
    print("currently using cuda")
else:
    device = "cpu"
    print("currently using cpu only")
### You can uncomment the following line and change model path to the model you want to decode
model_path="checkpoints/20231206_144057/model_20"
import model_regularisation_dropout_between_layer
args = {'seed': 123,
            'train_json': 'train_fbank.json',
            'val_json': 'dev_fbank.json',
            'test_json': 'test_fbank.json',
            'batch_size': 4,
            'num_layers': 2,
            'fbank_dims': 23,
            'model_dims': 128,
            'concat': 1,
            'lr': 0.5,
            'vocab': vocab,
            'report_interval': 50,
            'num_epochs': 20,
            'device': device,
           }

args = namedtuple('x', args)(**args)
model = model_regularisation_dropout_between_layer.BiLSTM(2, args.fbank_dims * args.concat, args.model_dims, len(args.vocab),0)
import torch
print('Loading model from {}'.format(model_path))
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()
from decoder import decode
results = decode(model, args, args.test_json)
print("SUB: {:.2f}%, DEL: {:.2f}%, INS: {:.2f}%, COR: {:.2f}%, PER: {:.2f}%".format(*results))

currently using cpu only
Loading model from checkpoints/20231206_144057/model_20
SUB: 14.49%, DEL: 6.49%, INS: 2.98%, COR: 79.02%, PER: 23.95%


# 一天天就知道凶人的实验

## First set: Using Adam optimiser

In [4]:
dropout_rates = [0]
Optimiser = ["Adam"]
Starting_Learning_rate = [0.0005, 0.001]

In [5]:
import model_regularisation_dropout
from datetime import datetime
from trainer_SGD_Scheduler import train as sgd_trainer
from trainer_Adam import train as adam_trainer
import torch
from decoder import decode

print("Start tuning For wider 1 Layer LSTM Using Adam Optimiser")

for starting_lr in Starting_Learning_rate:
    print("Currently using Adam optimiser")
    args = {'seed': 123,
        'train_json': 'train_fbank.json',
        'val_json': 'dev_fbank.json',
        'test_json': 'test_fbank.json',
        'batch_size': 4,
        'num_layers': 1,
        'fbank_dims': 23,
        'model_dims': 512,
        'concat': 1,
        'lr': starting_lr,
        'vocab': vocab,
        'report_interval': 50,
        'num_epochs': 20,
        'device': device,
       }

    args = namedtuple('x', args)(**args)
        
    
    for dropout_rate in dropout_rates:
        print("Currently using dropout rate of "+ str(dropout_rate))
        model_with_dropout = model_regularisation_dropout.BiLSTM(args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab), dropout_rate)
        num_params = sum(p.numel() for p in model_with_dropout.parameters())
        print('Total number of model parameters is {}'.format(num_params))
        start = datetime.now()
        model_with_dropout.to(args.device)
        model_path = adam_trainer(model_with_dropout, args)
        end = datetime.now()
        duration = (end - start).total_seconds()
        print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
        print('Model saved to {}'.format(model_path))
    
print("End tuning For Wider 1 Layer LSTM For Adam optimiser")
        

Start tuning For wider 1 Layer LSTM Using Adam Optimiser
Currently using Adam optimiser
Currently using dropout rate of 0
Total number of model parameters is 2240552
EPOCH 1:
  batch 50 loss: 7.185514311790467
  batch 100 loss: 3.1382794761657715
  batch 150 loss: 2.773013391494751
  batch 200 loss: 2.4820990467071535
  batch 250 loss: 2.2901057195663452
  batch 300 loss: 2.1064032411575315
  batch 350 loss: 1.9516385650634767
  batch 400 loss: 1.945366744995117
  batch 450 loss: 1.8494585585594177
  batch 500 loss: 1.7611941242218017
  batch 550 loss: 1.7059362936019897
  batch 600 loss: 1.6795011687278747
  batch 650 loss: 1.6228000044822692
  batch 700 loss: 1.6349195981025695
  batch 750 loss: 1.583092019557953
  batch 800 loss: 1.5757031726837158
  batch 850 loss: 1.541705322265625
  batch 900 loss: 1.5175226902961731
LOSS train 1.51752 valid 1.53086, valid PER 55.17%
EPOCH 2:
  batch 50 loss: 1.4617726612091064
  batch 100 loss: 1.4255144119262695
  batch 150 loss: 1.373790330886

  batch 200 loss: 0.44970256119966506
  batch 250 loss: 0.46298507452011106
  batch 300 loss: 0.4530462634563446
  batch 350 loss: 0.4568026512861252
  batch 400 loss: 0.47578509509563444
  batch 450 loss: 0.4580912682414055
  batch 500 loss: 0.4729467940330505
  batch 550 loss: 0.4466679900884628
  batch 600 loss: 0.47028467297554016
  batch 650 loss: 0.5095950663089752
  batch 700 loss: 0.4962752741575241
  batch 750 loss: 0.4787877720594406
  batch 800 loss: 0.47444204419851305
  batch 850 loss: 0.541334348320961
  batch 900 loss: 0.5161500668525696
LOSS train 0.51615 valid 0.91884, valid PER 26.32%
EPOCH 13:
  batch 50 loss: 0.4069992509484291
  batch 100 loss: 0.40771480947732924
  batch 150 loss: 0.4096907499432564
  batch 200 loss: 0.41857319951057437
  batch 250 loss: 0.4089272791147232
  batch 300 loss: 0.40105365991592407
  batch 350 loss: 0.4133379566669464
  batch 400 loss: 0.4304838877916336
  batch 450 loss: 0.44486793607473374
  batch 500 loss: 0.4149145808815956
  batch

  batch 150 loss: 1.0695122492313385
  batch 200 loss: 1.087842642068863
  batch 250 loss: 1.070361157655716
  batch 300 loss: 1.0728024065494537
  batch 350 loss: 1.1096388936042785
  batch 400 loss: 1.0716164553165435
  batch 450 loss: 1.0418659400939942
  batch 500 loss: 1.055646973848343
  batch 550 loss: 1.0472581195831299
  batch 600 loss: 1.020816365480423
  batch 650 loss: 1.0242637419700622
  batch 700 loss: 1.048163491487503
  batch 750 loss: 1.0698262739181519
  batch 800 loss: 1.0184289014339447
  batch 850 loss: 1.0493556261062622
  batch 900 loss: 0.9817560005187989
LOSS train 0.98176 valid 1.09159, valid PER 33.52%
EPOCH 4:
  batch 50 loss: 0.9546652746200561
  batch 100 loss: 0.9660443782806396
  batch 150 loss: 0.956738611459732
  batch 200 loss: 0.9821926057338715
  batch 250 loss: 0.9759292936325074
  batch 300 loss: 0.9679156577587128
  batch 350 loss: 0.9242408013343811
  batch 400 loss: 0.9636321437358856
  batch 450 loss: 0.9416297161579132
  batch 500 loss: 0.93

  batch 450 loss: 0.36926706016063693
  batch 500 loss: 0.3810656416416168
  batch 550 loss: 0.39444401413202285
  batch 600 loss: 0.3584222912788391
  batch 650 loss: 0.38745640516281127
  batch 700 loss: 0.4174047869443893
  batch 750 loss: 0.39436991691589357
  batch 800 loss: 0.3808287978172302
  batch 850 loss: 0.39787999629974363
  batch 900 loss: 0.4058504810929298
LOSS train 0.40585 valid 0.97322, valid PER 26.36%
EPOCH 15:
  batch 50 loss: 0.2941726258397102
  batch 100 loss: 0.30057130694389345
  batch 150 loss: 0.3000204434990883
  batch 200 loss: 0.3247966220974922
  batch 250 loss: 0.3340429958701134
  batch 300 loss: 0.322901716530323
  batch 350 loss: 0.341340546309948
  batch 400 loss: 0.3358198544383049
  batch 450 loss: 0.33072943091392515
  batch 500 loss: 0.3240824156999588
  batch 550 loss: 0.36176232069730757
  batch 600 loss: 0.35114766895771027
  batch 650 loss: 0.3501885625720024
  batch 700 loss: 0.37811822682619095
  batch 750 loss: 0.35144683986902236
  batc

## Second set: Using SGD Scheduler

In [6]:
dropout_rates = [0]
Optimiser = ["SGD_Scheduler"]
Starting_Learning_rate = [0.5, 0.7, 1.0, 1.5]

In [7]:
import model_regularisation_dropout
from datetime import datetime
from trainer_SGD_Scheduler import train as sgd_trainer
from trainer_Adam import train as adam_trainer
import torch
from decoder import decode

print("Start tuning For wider 1 Layer LSTM Using SGD Optimiser")

for starting_lr in Starting_Learning_rate:
    print("Currently using SGD optimiser")
    args = {'seed': 123,
        'train_json': 'train_fbank.json',
        'val_json': 'dev_fbank.json',
        'test_json': 'test_fbank.json',
        'batch_size': 4,
        'num_layers': 1,
        'fbank_dims': 23,
        'model_dims': 512,
        'concat': 1,
        'lr': starting_lr,
        'vocab': vocab,
        'report_interval': 50,
        'num_epochs': 20,
        'device': device,
       }

    args = namedtuple('x', args)(**args)
        
    
    for dropout_rate in dropout_rates:
        print("Currently using dropout rate of "+ str(dropout_rate))
        model_with_dropout = model_regularisation_dropout.BiLSTM(args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab), dropout_rate)
        num_params = sum(p.numel() for p in model_with_dropout.parameters())
        print('Total number of model parameters is {}'.format(num_params))
        start = datetime.now()
        model_with_dropout.to(args.device)
        model_path = sgd_trainer(model_with_dropout, args)
        end = datetime.now()
        duration = (end - start).total_seconds()
        print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
        print('Model saved to {}'.format(model_path))

print("End tuning For Wider 1 Layer LSTM For SGD optimiser")
        

Start tuning For wider 1 Layer LSTM Using SGD Optimiser
Currently using SGD optimiser
Currently using dropout rate of 0
Total number of model parameters is 2240552
EPOCH 1:
  batch 50 loss: 5.084213075637817
  batch 100 loss: 3.279260263442993
  batch 150 loss: 3.090498342514038
  batch 200 loss: 2.8440057468414306
  batch 250 loss: 2.6850899982452394
  batch 300 loss: 2.517392535209656
  batch 350 loss: 2.4043646907806395
  batch 400 loss: 2.373643870353699
  batch 450 loss: 2.2924081897735595
  batch 500 loss: 2.179419894218445
  batch 550 loss: 2.1380615186691285
  batch 600 loss: 2.071744055747986
  batch 650 loss: 1.9753476119041442
  batch 700 loss: 1.9737450432777406
  batch 750 loss: 1.9024597954750062
  batch 800 loss: 1.8859078216552734
  batch 850 loss: 1.8554858541488648
  batch 900 loss: 1.8198056840896606
LOSS train 1.81981 valid 1.76480, valid PER 66.21%
EPOCH 2:
  batch 50 loss: 1.774866716861725
  batch 100 loss: 1.708726739883423
  batch 150 loss: 1.6797557306289672
 

  batch 100 loss: 0.5626668834686279
  batch 150 loss: 0.507908553481102
  batch 200 loss: 0.5416071891784668
  batch 250 loss: 0.5562965106964112
  batch 300 loss: 0.5538198417425155
  batch 350 loss: 0.551470667719841
  batch 400 loss: 0.5668042111396789
  batch 450 loss: 0.5619343858957291
  batch 500 loss: 0.5758544409275055
  batch 550 loss: 0.5365975707769394
  batch 600 loss: 0.5517321610450745
  batch 650 loss: 0.5748515766859055
  batch 700 loss: 0.5796791338920593
  batch 750 loss: 0.5475450879335404
  batch 800 loss: 0.5521903282403946
  batch 850 loss: 0.6164826232194901
  batch 900 loss: 0.5989756894111633
Epoch 00012: reducing learning rate of group 0 to 1.2500e-01.
LOSS train 0.59898 valid 0.86699, valid PER 25.60%
EPOCH 13:
  batch 50 loss: 0.49491141438484193
  batch 100 loss: 0.4981077820062637
  batch 150 loss: 0.46924112617969516
  batch 200 loss: 0.49934529423713686
  batch 250 loss: 0.48229796230793
  batch 300 loss: 0.4832914388179779
  batch 350 loss: 0.47536240

  batch 400 loss: 1.4955157947540283
  batch 450 loss: 1.445053277015686
  batch 500 loss: 1.4861087727546691
  batch 550 loss: 1.4657780528068542
  batch 600 loss: 1.4295513868331908
  batch 650 loss: 1.4573638939857483
  batch 700 loss: 1.4118761134147644
  batch 750 loss: 1.4063637137413025
  batch 800 loss: 1.3592495822906494
  batch 850 loss: 1.3499480080604553
  batch 900 loss: 1.3910088968276977
LOSS train 1.39101 valid 1.35717, valid PER 43.47%
EPOCH 3:
  batch 50 loss: 1.3413076567649842
  batch 100 loss: 1.2998478031158447
  batch 150 loss: 1.2973154497146606
  batch 200 loss: 1.271260895729065
  batch 250 loss: 1.275689162015915
  batch 300 loss: 1.2659638667106627
  batch 350 loss: 1.3231982004642486
  batch 400 loss: 1.2813828027248382
  batch 450 loss: 1.267249881029129
  batch 500 loss: 1.2329501593112946
  batch 550 loss: 1.2468984973430635
  batch 600 loss: 1.2121263718605042
  batch 650 loss: 1.2103162705898285
  batch 700 loss: 1.2230168390274048
  batch 750 loss: 1.

  batch 550 loss: 0.3876370874047279
  batch 600 loss: 0.3639501443505287
  batch 650 loss: 0.3872870236635208
  batch 700 loss: 0.3830036637187004
  batch 750 loss: 0.3439823570847511
  batch 800 loss: 0.35943577229976653
  batch 850 loss: 0.3784014376997948
  batch 900 loss: 0.39757623463869096
Epoch 00013: reducing learning rate of group 0 to 8.7500e-02.
LOSS train 0.39758 valid 0.93474, valid PER 25.50%
EPOCH 14:
  batch 50 loss: 0.31717868596315385
  batch 100 loss: 0.3255603665113449
  batch 150 loss: 0.31248593121767043
  batch 200 loss: 0.29944984972476957
  batch 250 loss: 0.3122228875756264
  batch 300 loss: 0.3226291432976723
  batch 350 loss: 0.298842137157917
  batch 400 loss: 0.29614907413721087
  batch 450 loss: 0.3132588747143745
  batch 500 loss: 0.31753602921962737
  batch 550 loss: 0.32253783077001574
  batch 600 loss: 0.29359151363372804
  batch 650 loss: 0.3093507650494576
  batch 700 loss: 0.3253193932771683
  batch 750 loss: 0.30269073039293287
  batch 800 loss: 

  batch 850 loss: 1.1587940633296967
  batch 900 loss: 1.1254368066787719
LOSS train 1.12544 valid 1.17209, valid PER 35.22%
EPOCH 4:
  batch 50 loss: 1.0716103172302247
  batch 100 loss: 1.1115892624855042
  batch 150 loss: 1.074433025121689
  batch 200 loss: 1.0931191670894622
  batch 250 loss: 1.0945008051395417
  batch 300 loss: 1.0939276552200317
  batch 350 loss: 1.0294990456104278
  batch 400 loss: 1.0622120213508606
  batch 450 loss: 1.0439597153663636
  batch 500 loss: 1.0268917155265809
  batch 550 loss: 1.0646444475650787
  batch 600 loss: 1.0816781759262084
  batch 650 loss: 1.0509575641155242
  batch 700 loss: 1.051159827709198
  batch 750 loss: 1.0231579673290252
  batch 800 loss: 0.9938838684558868
  batch 850 loss: 1.0421117460727691
  batch 900 loss: 1.0773703587055206
LOSS train 1.07737 valid 1.11236, valid PER 33.81%
EPOCH 5:
  batch 50 loss: 0.9526908957958221
  batch 100 loss: 0.9806990480422973
  batch 150 loss: 1.0104088866710663
  batch 200 loss: 0.9444855415821

  batch 800 loss: 0.3192412179708481
  batch 850 loss: 0.33288459211587906
  batch 900 loss: 0.33084321618080137
Epoch 00014: reducing learning rate of group 0 to 3.1250e-02.
LOSS train 0.33084 valid 0.87673, valid PER 24.74%
EPOCH 15:
  batch 50 loss: 0.3164902064204216
  batch 100 loss: 0.3113329672813416
  batch 150 loss: 0.31692228704690933
  batch 200 loss: 0.3201861813664436
  batch 250 loss: 0.33551207959651946
  batch 300 loss: 0.31006830364465715
  batch 350 loss: 0.32236216247081756
  batch 400 loss: 0.31574627727270127
  batch 450 loss: 0.30256917506456377
  batch 500 loss: 0.30583421379327774
  batch 550 loss: 0.3064282888174057
  batch 600 loss: 0.31817650467157366
  batch 650 loss: 0.3285538575053215
  batch 700 loss: 0.3315695136785507
  batch 750 loss: 0.31693208575248716
  batch 800 loss: 0.3034723097085953
  batch 850 loss: 0.2953485381603241
  batch 900 loss: 0.3162604469060898
Epoch 00015: reducing learning rate of group 0 to 1.5625e-02.
LOSS train 0.31626 valid 0.8

  batch 200 loss: 0.9340521252155304
  batch 250 loss: 0.9687004792690277
  batch 300 loss: 0.9645647311210632
  batch 350 loss: 0.9485770857334137
  batch 400 loss: 0.9585914933681488
  batch 450 loss: 0.9563530802726745
  batch 500 loss: 0.9687843108177185
  batch 550 loss: 0.9389273178577423
  batch 600 loss: 1.0064745116233826
  batch 650 loss: 0.9751174664497375
  batch 700 loss: 1.0467513597011566
  batch 750 loss: 1.0102285766601562
  batch 800 loss: 0.9739977443218231
  batch 850 loss: 0.9655930197238922
  batch 900 loss: 0.9669645738601684
LOSS train 0.96696 valid 1.04439, valid PER 32.04%
EPOCH 6:
  batch 50 loss: 0.9248822951316833
  batch 100 loss: 0.889702582359314
  batch 150 loss: 0.8625107336044312
  batch 200 loss: 0.9019035804271698
  batch 250 loss: 0.9522956776618957
  batch 300 loss: 0.8921441078186035
  batch 350 loss: 0.8829591941833496
  batch 400 loss: 0.8755054533481598
  batch 450 loss: 0.9478652381896973
  batch 500 loss: 0.9485787427425385
  batch 550 loss:

  batch 50 loss: 0.291055403649807
  batch 100 loss: 0.2786509099602699
  batch 150 loss: 0.28050599902868273
  batch 200 loss: 0.27124046385288236
  batch 250 loss: 0.28743150025606157
  batch 300 loss: 0.2709192422032356
  batch 350 loss: 0.2828856107592583
  batch 400 loss: 0.27981853783130645
  batch 450 loss: 0.29188023805618285
  batch 500 loss: 0.2639802795648575
  batch 550 loss: 0.2667907354235649
  batch 600 loss: 0.27451126992702485
  batch 650 loss: 0.287089632153511
  batch 700 loss: 0.26710293084383013
  batch 750 loss: 0.27865264981985094
  batch 800 loss: 0.27663732320070267
  batch 850 loss: 0.27191267102956773
  batch 900 loss: 0.2892590942978859
Epoch 00016: reducing learning rate of group 0 to 1.1719e-02.
LOSS train 0.28926 valid 0.90320, valid PER 24.95%
EPOCH 17:
  batch 50 loss: 0.2728401251137257
  batch 100 loss: 0.275545814037323
  batch 150 loss: 0.27271121308207513
  batch 200 loss: 0.26684260070323945
  batch 250 loss: 0.28078650891780854
  batch 300 loss: 

# 一天天就知道凶人的实验第三组

# Adam learning rate [0.005]
# Dropout Rate =[0, 0.1, 0.3, 0.5, 0.7]
# Model Structure 2 layer unidirectional LSTM, 210 dim in each layer

In [4]:
dropout_rates = [0, 0.1, 0.3, 0.5, 0.7]
starting_lrs = [0.005]
Optimiser = ["Adam"]

In [5]:
import model_uni_directional_LSTM
from datetime import datetime
from trainer_SGD_Scheduler import train as sgd_trainer
from trainer_Adam import train as adam_trainer
import torch
from decoder import decode

print("Start tuning For uni directional 2 Layer LSTM")

for opt in Optimiser:
    print("Currently using "+ opt +" optimiser")
    for starting_lr in starting_lrs:
        if opt=="Adam":
            args = {'seed': 123,
                'train_json': 'train_fbank.json',
                'val_json': 'dev_fbank.json',
                'test_json': 'test_fbank.json',
                'batch_size': 4,
                'num_layers': 2,
                'fbank_dims': 23,
                'model_dims': 210,
                'concat': 1,
                'lr': starting_lr,
                'vocab': vocab,
                'report_interval': 50,
                'num_epochs': 20,
                'device': device,
               }

            args = namedtuple('x', args)(**args)
        else:
            args = {'seed': 123,
                'train_json': 'train_fbank.json',
                'val_json': 'dev_fbank.json',
                'test_json': 'test_fbank.json',
                'batch_size': 4,
                'num_layers': 2,
                'fbank_dims': 23,
                'model_dims': 210,
                'concat': 1,
                'lr': starting_lr,
                'vocab': vocab,
                'report_interval': 50,
                'num_epochs': 20,
                'device': device,
               }

            args = namedtuple('x', args)(**args)


        for dropout_rate in dropout_rates:
            print("Currently using dropout rate of "+ str(dropout_rate))
            model_with_dropout = model_uni_directional_LSTM.BiLSTM(args.num_layers, args.fbank_dims * args.concat, args.model_dims, len(args.vocab), dropout_rate)
            num_params = sum(p.numel() for p in model_with_dropout.parameters())
            print('Total number of model parameters is {}'.format(num_params))
            start = datetime.now()
            model_with_dropout.to(args.device)
            if opt=="Adam":
                model_path = adam_trainer(model_with_dropout, args)
            else:
                model_path = sgd_trainer(model_with_dropout, args)
            end = datetime.now()
            duration = (end - start).total_seconds()
            print('Training finished in {} minutes.'.format(divmod(duration, 60)[0]))
            print('Model saved to {}'.format(model_path))
    
    print("Finish "+ opt +" optimiser")
print("End tuning For uni directional 2 Layer LSTM")

Start tuning For uni directional 2 Layer LSTM
Currently using Adam optimiser
Currently using dropout rate of 0
Total number of model parameters is 560320
EPOCH 1:
  batch 50 loss: 4.346335139274597
  batch 100 loss: 3.123088126182556
  batch 150 loss: 2.8972212362289427
  batch 200 loss: 2.759316143989563
  batch 250 loss: 2.633390688896179
  batch 300 loss: 2.457405891418457
  batch 350 loss: 2.27980318069458
  batch 400 loss: 2.2151732540130613
  batch 450 loss: 2.0854905366897585
  batch 500 loss: 2.027049510478973
  batch 550 loss: 1.9372886395454407
  batch 600 loss: 1.8851449275016785
  batch 650 loss: 1.8118459391593933
  batch 700 loss: 1.8733761239051818
  batch 750 loss: 1.7866853785514831
  batch 800 loss: 1.7571559047698975
  batch 850 loss: 1.741557686328888
  batch 900 loss: 1.729168155193329
LOSS train 1.72917 valid 1.68293, valid PER 53.77%
EPOCH 2:
  batch 50 loss: 1.6805900597572327
  batch 100 loss: 1.6421936058998108
  batch 150 loss: 1.6390576481819152
  batch 200 

  batch 200 loss: 0.9457796919345856
  batch 250 loss: 1.0278704571723938
  batch 300 loss: 0.9684745728969574
  batch 350 loss: 0.9747871959209442
  batch 400 loss: 0.996763927936554
  batch 450 loss: 0.9441496992111206
  batch 500 loss: 1.016985023021698
  batch 550 loss: 0.9403657221794128
  batch 600 loss: 0.9850481700897217
  batch 650 loss: 1.0400050342082978
  batch 700 loss: 1.0020142376422883
  batch 750 loss: 0.9509621059894562
  batch 800 loss: 0.9749845099449158
  batch 850 loss: 1.0084507048130036
  batch 900 loss: 0.9841943144798279
LOSS train 0.98419 valid 1.06885, valid PER 32.68%
EPOCH 13:
  batch 50 loss: 0.9511243331432343
  batch 100 loss: 0.9984589684009552
  batch 150 loss: 0.9416204750537872
  batch 200 loss: 0.9569619083404541
  batch 250 loss: 0.960421462059021
  batch 300 loss: 0.9359949278831482
  batch 350 loss: 0.9645335602760315
  batch 400 loss: 1.026206476688385
  batch 450 loss: 1.0437476682662963
  batch 500 loss: 0.9742412936687469
  batch 550 loss: 0

  batch 350 loss: 1.4212997007369994
  batch 400 loss: 1.3944188094139098
  batch 450 loss: 1.3860944437980651
  batch 500 loss: 1.3918337512016297
  batch 550 loss: 1.376759672164917
  batch 600 loss: 1.3486296474933623
  batch 650 loss: 1.320385057926178
  batch 700 loss: 1.3427540981769561
  batch 750 loss: 1.4199447441101074
  batch 800 loss: 1.3214681839942932
  batch 850 loss: 1.3489288091659546
  batch 900 loss: 1.3224798917770386
LOSS train 1.32248 valid 1.38168, valid PER 42.95%
EPOCH 4:
  batch 50 loss: 1.327938266992569
  batch 100 loss: 1.317831835746765
  batch 150 loss: 1.2667737650871276
  batch 200 loss: 1.2896015429496765
  batch 250 loss: 1.301979546546936
  batch 300 loss: 1.3257019996643067
  batch 350 loss: 1.2707879745960236
  batch 400 loss: 1.3070688939094544
  batch 450 loss: 1.2689840757846833
  batch 500 loss: 1.234335242509842
  batch 550 loss: 1.2901889824867248
  batch 600 loss: 1.3281439304351808
  batch 650 loss: 1.3101835107803346
  batch 700 loss: 1.28

  batch 750 loss: 0.9998311042785645
  batch 800 loss: 0.9480529737472534
  batch 850 loss: 1.0482417905330659
  batch 900 loss: 0.999470648765564
LOSS train 0.99947 valid 1.08362, valid PER 33.66%
EPOCH 15:
  batch 50 loss: 0.9902499556541443
  batch 100 loss: 0.9472691059112549
  batch 150 loss: 0.930337985754013
  batch 200 loss: 0.9989167439937592
  batch 250 loss: 0.9690368342399597
  batch 300 loss: 0.9400697934627533
  batch 350 loss: 0.9541654324531555
  batch 400 loss: 0.9410768043994904
  batch 450 loss: 0.9758247375488281
  batch 500 loss: 0.9717845344543456
  batch 550 loss: 0.9819350671768189
  batch 600 loss: 1.0323086631298066
  batch 650 loss: 1.014403188228607
  batch 700 loss: 0.977833491563797
  batch 750 loss: 0.9849282848834991
  batch 800 loss: 0.9972719252109528
  batch 850 loss: 0.9390538775920868
  batch 900 loss: 0.9971554863452912
LOSS train 0.99716 valid 1.05026, valid PER 31.50%
EPOCH 16:
  batch 50 loss: 0.9858012926578522
  batch 100 loss: 0.9364194929599

  batch 900 loss: 1.4219681859016418
LOSS train 1.42197 valid 1.30111, valid PER 40.91%
EPOCH 6:
  batch 50 loss: 1.3492155432701112
  batch 100 loss: 1.3503376865386962
  batch 150 loss: 1.2969682717323303
  batch 200 loss: 1.3504661583900452
  batch 250 loss: 1.3463176798820495
  batch 300 loss: 1.3174651193618774
  batch 350 loss: 1.3264539909362794
  batch 400 loss: 1.3297682511806488
  batch 450 loss: 1.3667264246940614
  batch 500 loss: 1.361070306301117
  batch 550 loss: 1.3435300445556642
  batch 600 loss: 1.3596639823913574
  batch 650 loss: 1.392453029155731
  batch 700 loss: 1.3361058640480041
  batch 750 loss: 1.2844684028625488
  batch 800 loss: 1.3160608291625977
  batch 850 loss: 1.2885420608520508
  batch 900 loss: 1.3674168014526367
LOSS train 1.36742 valid 1.33808, valid PER 42.13%
EPOCH 7:
  batch 50 loss: 1.3344583821296692
  batch 100 loss: 1.3165594363212585
  batch 150 loss: 1.3294534015655517
  batch 200 loss: 1.277997808456421
  batch 250 loss: 1.26806104183197

  batch 300 loss: 1.1741521263122559
  batch 350 loss: 1.154753235578537
  batch 400 loss: 1.2108310914039613
  batch 450 loss: 1.2988735330104828
  batch 500 loss: 1.2569367396831512
  batch 550 loss: 1.193630645275116
  batch 600 loss: 1.3175043439865113
  batch 650 loss: 1.3204483163356782
  batch 700 loss: 1.2548356604576112
  batch 750 loss: 1.1858949506282805
  batch 800 loss: 1.1943905007839204
  batch 850 loss: 1.1977395415306091
  batch 900 loss: 1.1722902762889862
LOSS train 1.17229 valid 1.21022, valid PER 37.92%
EPOCH 18:
  batch 50 loss: 1.1614924228191377
  batch 100 loss: 1.189409888982773
  batch 150 loss: 1.2188839280605317
  batch 200 loss: 1.1784587168693543
  batch 250 loss: 1.1791108787059783
  batch 300 loss: 1.1774186432361602
  batch 350 loss: 1.2054944515228272
  batch 400 loss: 1.1659189093112945
  batch 450 loss: 1.226254094839096
  batch 500 loss: 1.1932009887695312
  batch 550 loss: 1.1714679157733918
  batch 600 loss: 1.176809970140457
  batch 650 loss: 1.

  batch 450 loss: 1.5479865097999572
  batch 500 loss: 1.552217402458191
  batch 550 loss: 1.5051586937904358
  batch 600 loss: 1.5148541021347046
  batch 650 loss: 1.5870400333404542
  batch 700 loss: 1.5236979079246522
  batch 750 loss: 1.5158755445480347
  batch 800 loss: 1.4952186059951782
  batch 850 loss: 1.50380713224411
  batch 900 loss: 1.4993728947639466
LOSS train 1.49937 valid 1.40968, valid PER 48.70%
EPOCH 9:
  batch 50 loss: 1.4263037276268005
  batch 100 loss: 1.5196050930023193
  batch 150 loss: 1.4916280221939087
  batch 200 loss: 1.4176553678512573
  batch 250 loss: 1.4641001915931702
  batch 300 loss: 1.4644421410560609
  batch 350 loss: 1.4924025464057922
  batch 400 loss: 1.4752102994918823
  batch 450 loss: 1.454753165245056
  batch 500 loss: 1.4359695553779601
  batch 550 loss: 1.4653852224349975
  batch 600 loss: 1.4648370599746705
  batch 650 loss: 1.4521031332015992
  batch 700 loss: 1.4368066048622132
  batch 750 loss: 1.4857138347625733
  batch 800 loss: 1.

  batch 800 loss: 1.2622269093990326
  batch 850 loss: 1.2869526147842407
  batch 900 loss: 1.2744816744327545
LOSS train 1.27448 valid 1.23670, valid PER 38.89%
EPOCH 20:
  batch 50 loss: 1.255370750427246
  batch 100 loss: 1.286340537071228
  batch 150 loss: 1.2802221632003785
  batch 200 loss: 1.2814559423923493
  batch 250 loss: 1.261622394323349
  batch 300 loss: 1.263218140602112
  batch 350 loss: 1.239763733148575
  batch 400 loss: 1.2809998846054078
  batch 450 loss: 1.2565087187290191
  batch 500 loss: 1.265830866098404
  batch 550 loss: 1.3241942346096038
  batch 600 loss: 1.2508706045150757
  batch 650 loss: 1.3063528990745545
  batch 700 loss: 1.3029358422756194
  batch 750 loss: 1.2702208244800568
  batch 800 loss: 1.2714236307144164
  batch 850 loss: 1.2970575499534607
  batch 900 loss: 1.3848312520980834
LOSS train 1.38483 valid 1.23069, valid PER 38.08%
Training finished in 9.0 minutes.
Model saved to checkpoints/20231211_113748/model_20
Currently using dropout rate of 

LOSS train 1.33678 valid 1.22709, valid PER 38.35%
EPOCH 11:
  batch 50 loss: 1.257199776172638
  batch 100 loss: 1.2723645758628845
  batch 150 loss: 1.2776077616214752
  batch 200 loss: 1.3057368791103363
  batch 250 loss: 1.2883813822269439
  batch 300 loss: 1.2460764145851135
  batch 350 loss: 1.2846076476573944
  batch 400 loss: 1.2777276706695557
  batch 450 loss: 1.3117445385456086
  batch 500 loss: 1.2615432679653167
  batch 550 loss: 1.2863542997837067
  batch 600 loss: 1.2705463755130768
  batch 650 loss: 1.3186847972869873
  batch 700 loss: 1.2515344846248626
  batch 750 loss: 1.2802686476707459
  batch 800 loss: 1.301668883562088
  batch 850 loss: 1.3474023628234864
  batch 900 loss: 1.3555156469345093
LOSS train 1.35552 valid 1.20341, valid PER 37.85%
EPOCH 12:
  batch 50 loss: 1.2961091196537018
  batch 100 loss: 1.252028201818466
  batch 150 loss: 1.2761135578155518
  batch 200 loss: 1.2966256868839263
  batch 250 loss: 1.3259624552726745
  batch 300 loss: 1.281830010414