In [1]:
%pylab inline

import pandas as pd
import nltk

import kapre
import arrow
import pprint
import threading
import pprint
from tqdm import tqdm

from soph import MFCC, center_wave, Delta
from soph import ex_generator as old_gen

import tensorflow as tf
config = tf.ConfigProto(allow_soft_placement = True)
sess = tf.Session(config = config)

import keras
import keras.backend as K
K.set_session(sess)


ex_df = pd.read_pickle("data/ex_df.pkl")

Populating the interactive namespace from numpy and matplotlib


Using TensorFlow backend.


In [2]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 9906522733480974406, name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 357957632
 locality {
   bus_id: 1
 }
 incarnation: 17820599789477874000
 physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:00:1e.0, compute capability: 3.7"]

# Utility code

In [3]:
nltk.download('cmudict')
arpabet = nltk.corpus.cmudict.dict()

words = list(ex_df.raw_label.unique())
words.remove(np.nan)
words.remove("silence")

phone_dict = dict()
phone_set = set()
maxlen = 0
for w in words:
    print(w, arpabet[w])
    phones = arpabet[w][0]
    phones = [p.strip("012") for p in phones] #remove emphasis on vowels
    phones_b = phones
    phone_dict[w] = phones_b
    phone_set |= set(phones)
    if (len(phones_b)) > maxlen:
        maxlen = len(phones_b)
phone_dict["silence"] = ["sil"]
alphabet = sorted(list(phone_set)) + ["sil"] + ["*"]

def text_to_labels(text):
    phones = phone_dict[text]
    ret = [alphabet.index(p) for p in phones]
    return ret

N_CAT = len(alphabet)
# pprint.pprint(phone_dict, compact=True)
print("{} phonemes in alphabet".format(N_CAT))

pprint.pprint(alphabet, compact=True)

[nltk_data] Downloading package cmudict to /home/ubuntu/nltk_data...
[nltk_data]   Package cmudict is already up-to-date!
right [['R', 'AY1', 'T']]
eight [['EY1', 'T']]
cat [['K', 'AE1', 'T']]
tree [['T', 'R', 'IY1']]
bed [['B', 'EH1', 'D']]
happy [['HH', 'AE1', 'P', 'IY0']]
go [['G', 'OW1']]
dog [['D', 'AO1', 'G']]
no [['N', 'OW1']]
wow [['W', 'AW1']]
nine [['N', 'AY1', 'N']]
left [['L', 'EH1', 'F', 'T']]
stop [['S', 'T', 'AA1', 'P']]
three [['TH', 'R', 'IY1']]
sheila [['SH', 'IY1', 'L', 'AH0']]
one [['W', 'AH1', 'N'], ['HH', 'W', 'AH1', 'N']]
bird [['B', 'ER1', 'D']]
zero [['Z', 'IH1', 'R', 'OW0'], ['Z', 'IY1', 'R', 'OW0']]
seven [['S', 'EH1', 'V', 'AH0', 'N']]
up [['AH1', 'P']]
marvin [['M', 'AA1', 'R', 'V', 'IH0', 'N']]
two [['T', 'UW1']]
house [['HH', 'AW1', 'S']]
down [['D', 'AW1', 'N']]
six [['S', 'IH1', 'K', 'S']]
yes [['Y', 'EH1', 'S']]
on [['AA1', 'N'], ['AO1', 'N']]
five [['F', 'AY1', 'V']]
off [['AO1', 'F']]
four [['F', 'AO1', 'R']]
34 phonemes in alphabet
['AA', 'AE', 'AH'

In [4]:
def ex_generator(
        batch_size=32,
        shuffle=True,
        state=["train"],
        p_transform=0,
        vol_range=0,
        shift=0,
):

    epoch_df = ex_df[ex_df.state.isin(state)]
    num_ex = len(epoch_df)
    indices = np.arange(num_ex)

    # epoch loop runs
    while True:

        # shuffle anew every epoch
        if shuffle:
            epoch_df = epoch_df.sample(frac=1)

        # batch loop
        for i in np.arange(0, num_ex, batch_size):

            if i+batch_size > num_ex:
                continue
            
            batch_df = epoch_df.iloc[i:i + batch_size, :]

            x = np.zeros((len(batch_df), 16000))
            indices = []
            values = []

            # example loop
            for b in range(len(batch_df)):

                x[b, ...] = center_wave(
                    epoch_df.fn.values[b],
                    vol_range=vol_range,
                    shift=shift,
                    p_transform=p_transform)

                labels_i = text_to_labels(epoch_df.raw_label.values[b])
                
                for t,v in enumerate(labels_i):
                    indices.append([b,t])
                    values.append(v)
                    


            indices = np.array(indices, dtype='int32')
            values = np.array(values, dtype='int32')
            shape = np.array([len(batch_df), maxlen], dtype='int32')
            sequence_length = np.full((len(batch_df),), 100, dtype='int32')

            yield {"wav":x, 
                   "labels": (indices, values, shape),
                   "sequence_length": sequence_length}


In [5]:
N_SEQ = 100
SR = 16000
F_STEP = SR // N_SEQ
F_LEN = (5 * F_STEP) // 2
N_FFT = 512
N_MEL = 30
N_MFCC = 15
MAX_F = 4000
MIN_F = 40

# DROP = .5
NOISE = .75

In [6]:
with tf.device('/gpu:0'):
    lr = tf.placeholder(tf.float32, [], name='lr')
    wav_input = K.placeholder((None, SR), dtype='float32', name="wav_input")
    labels = K.placeholder(sparse=True, dtype='int32', name="labels")
    sequence_length = K.placeholder(
        [None], dtype='int32', name="sequence_length")

    wav_mean = K.mean(wav_input, axis=1, keepdims=True)
    wav_std = K.std(wav_input, axis=1, keepdims=True)
    wav_input = (wav_input - wav_input) / wav_std

    base_model = keras.Sequential([
        keras.layers.InputLayer(input_tensor=wav_input),
        MFCC(
            frame_length=F_LEN,
            frame_step=F_STEP,
            fft_length=N_FFT,
            num_mel_bins=N_MEL,
            sr=SR,
            n_mfcc=N_MFCC,
            upper_edge_hertz=MAX_F,
            lower_edge_hertz=MIN_F),
        keras.layers.BatchNormalization(),
        keras.layers.GaussianNoise(NOISE),
        Delta(),
        keras.layers.Reshape((N_SEQ, N_MFCC * 3)),

        #         keras.layers.Bidirectional(
        #             keras.layers.GRU(50, return_sequences=True), merge_mode="concat"),
        #         keras.layers.BatchNormalization(),
        #         keras.layers.Dropout(DROP),

        keras.layers.Bidirectional(
            keras.layers.GRU(50, activation="elu", return_sequences=True), merge_mode="ave"),
        keras.layers.BatchNormalization(),
        keras.layers.GaussianNoise(NOISE),
        
        keras.layers.Bidirectional(
            keras.layers.GRU(N_CAT, activation="linear", return_sequences=True),
            merge_mode="ave"),
    ])
    base_model.summary()

    logits = base_model.output
    logits = tf.transpose(logits, (1, 0, 2))

    loss = tf.nn.ctc_loss(labels, logits, sequence_length)
    cost = tf.reduce_mean(loss)
    optimizer = tf.contrib.opt.NadamOptimizer(learning_rate=lr).minimize(cost)

    decoded, _ = tf.nn.ctc_beam_search_decoder(
        logits,
        sequence_length,
        beam_width=100,
        top_paths=10,
    )
    greedy_decoded, _ = tf.nn.ctc_beam_search_decoder(
        logits,
        sequence_length,
        beam_width=5,
        top_paths=1,
    )

    ler = tf.reduce_mean(
        tf.edit_distance(tf.cast(decoded[0], 'int32'), labels))
    
    greedy_ler = tf.reduce_mean(
        tf.edit_distance(tf.cast(greedy_decoded[0], 'int32'), labels))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 16000)             0         
_________________________________________________________________
mfcc_1 (MFCC)                (None, 100, 15)           7710      
_________________________________________________________________
batch_normalization_1 (Batch (None, 100, 15)           60        
_________________________________________________________________
gaussian_noise_1 (GaussianNo (None, 100, 15)           0         
_________________________________________________________________
delta_1 (Delta)              (None, 100, 15, 3)        5         
_________________________________________________________________
reshape_1 (Reshape)          (None, 100, 45)           0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 100, 50)           28800     
__________

In [7]:
init_op = tf.global_variables_initializer()
sess.run(init_op)

In [8]:
N_BATCH = 512
train_gen = ex_generator(
    batch_size=N_BATCH,
    state=["train"],
    p_transform=0, 
    vol_range=.1, 
    shift=0,
)
val_gen = ex_generator(
    batch_size=N_BATCH,
    state=["val", "test"],
    shuffle=True,
)

In [9]:
num_epochs = 200
train_steps = sum(ex_df.state.isin(["train"]))//N_BATCH
val_steps = sum(ex_df.state.isin(["val", "test"]))//N_BATCH
LR = 0.002
LR_WINDOW = 3
LR_STEP = .25
STOP_WINDOW = 6

with sess.as_default():
    hist = {
        "train_cost": [],
        "train_ler": [],
        "val_cost": [],
        "val_ler": [],
        "lr": []
    }
    
    for epoch_i in range(num_epochs):
        train_cost = []
        train_ler = []
    
        print("beginning epoch {}/{}. lr={}".format(epoch_i+1, num_epochs, LR))
        
        step_bar = tqdm(range(train_steps))
        
        for step_i in step_bar:
            
            train_batch = next(train_gen)
            
            feed = {
                lr: LR,
                wav_input: train_batch["wav"],
                sequence_length: train_batch["sequence_length"],
                labels: train_batch["labels"],
                K.learning_phase(): 1,
            }
#             optimizer.run(feed_dict=feed)
            batch_cost, batch_ler, _, _ = sess.run([cost, greedy_ler, optimizer, base_model.updates], feed)
            train_cost.append(batch_cost)
            train_ler.append(batch_ler*100)
            
            step_bar.set_description("train {}/{} | cost: {:.4f} | ler: {:.3f}".format(
                step_i+1,
                train_steps,
                np.mean(train_cost), 
                np.mean(train_ler),
            ))
            
        val_cost = []
        val_ler = []
        step_bar = tqdm(range(val_steps))
        for step_i in step_bar:

            val_batch = next(val_gen)
            feed = {
                lr: LR,
                wav_input: val_batch['wav'],
                sequence_length: val_batch['sequence_length'],
                labels: val_batch['labels'],
                K.learning_phase(): 0,
            }
            batch_cost, batch_ler = sess.run([cost, greedy_ler], feed)
            val_cost.append(batch_cost)
            val_ler.append(batch_ler*100)
            
            step_bar.set_description("validation {}/{} | cost: {:.4f} | ler: {:.3f}".format(
                step_i+1,
                val_steps,
                np.mean(val_cost), 
                np.mean(val_ler),
            ))

        
        hist["train_cost"].append(train_cost)
        hist["train_ler"].append(train_ler)
        hist["val_cost"].append(val_cost)
        hist["val_ler"].append(val_ler)
        hist["lr"].append(LR)
        
        out_means = np.mean(hist["val_ler"], axis=1)
        best_i = argmin(out_means)
        
        if epoch_i - best_i > LR_WINDOW:
            print("reducing learning rate {:.4f}->{:.4f}".format(
                LR,
                LR*LR_STEP
            ))
            LR = LR*LR_STEP
                
        if epoch_i - best_i > STOP_WINDOW:
            print("early stopping!")
            break

  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 1/200. lr=0.002


train 103/103 | cost: 52.7940 | ler: 310.963: 100%|██████████| 103/103 [01:27<00:00,  1.18it/s]
validation 27/27 | cost: 18.2517 | ler: 97.270: 100%|██████████| 27/27 [00:18<00:00,  1.43it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 2/200. lr=0.002


train 103/103 | cost: 11.6196 | ler: 92.760: 100%|██████████| 103/103 [01:22<00:00,  1.24it/s]
validation 27/27 | cost: 21.9358 | ler: 103.405: 100%|██████████| 27/27 [00:18<00:00,  1.44it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 3/200. lr=0.002


train 103/103 | cost: 10.4069 | ler: 87.278: 100%|██████████| 103/103 [01:23<00:00,  1.24it/s]
validation 27/27 | cost: 17.1606 | ler: 93.512: 100%|██████████| 27/27 [00:19<00:00,  1.41it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 4/200. lr=0.002


train 103/103 | cost: 9.4026 | ler: 81.953: 100%|██████████| 103/103 [01:24<00:00,  1.22it/s]
validation 27/27 | cost: 12.4248 | ler: 81.187: 100%|██████████| 27/27 [00:18<00:00,  1.43it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 5/200. lr=0.002


train 103/103 | cost: 8.8051 | ler: 76.295: 100%|██████████| 103/103 [01:15<00:00,  1.37it/s]
validation 27/27 | cost: 9.7947 | ler: 75.978: 100%|██████████| 27/27 [00:10<00:00,  2.54it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 6/200. lr=0.002


train 103/103 | cost: 7.9946 | ler: 69.647: 100%|██████████| 103/103 [01:12<00:00,  1.43it/s]
validation 27/27 | cost: 9.3377 | ler: 72.835: 100%|██████████| 27/27 [00:10<00:00,  2.51it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 7/200. lr=0.002


train 103/103 | cost: 7.6328 | ler: 66.070: 100%|██████████| 103/103 [01:13<00:00,  1.40it/s]
validation 27/27 | cost: 8.8451 | ler: 69.324: 100%|██████████| 27/27 [00:16<00:00,  1.67it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 8/200. lr=0.002


train 103/103 | cost: 6.9549 | ler: 62.869: 100%|██████████| 103/103 [01:13<00:00,  1.40it/s]
validation 27/27 | cost: 7.9630 | ler: 66.607: 100%|██████████| 27/27 [00:13<00:00,  1.99it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 9/200. lr=0.002


train 103/103 | cost: 5.9375 | ler: 55.686: 100%|██████████| 103/103 [01:12<00:00,  1.41it/s]
validation 27/27 | cost: 7.6526 | ler: 60.832: 100%|██████████| 27/27 [00:13<00:00,  1.98it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 10/200. lr=0.002


train 103/103 | cost: 6.6066 | ler: 59.035: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 7.9372 | ler: 63.260: 100%|██████████| 27/27 [00:13<00:00,  1.98it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 11/200. lr=0.002


train 103/103 | cost: 6.1135 | ler: 54.330: 100%|██████████| 103/103 [01:13<00:00,  1.40it/s]
validation 27/27 | cost: 7.2582 | ler: 57.630: 100%|██████████| 27/27 [00:13<00:00,  1.99it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 12/200. lr=0.002


train 103/103 | cost: 5.1550 | ler: 47.262: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 7.4285 | ler: 57.092: 100%|██████████| 27/27 [00:13<00:00,  1.95it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 13/200. lr=0.002


train 103/103 | cost: 5.0303 | ler: 44.935: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 6.6715 | ler: 52.104: 100%|██████████| 27/27 [00:13<00:00,  1.98it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 14/200. lr=0.002


train 103/103 | cost: 4.6910 | ler: 42.615: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 6.4526 | ler: 49.055: 100%|██████████| 27/27 [00:13<00:00,  1.94it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 15/200. lr=0.002


train 103/103 | cost: 4.2495 | ler: 37.107: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 5.9815 | ler: 44.261: 100%|██████████| 27/27 [00:13<00:00,  1.95it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 16/200. lr=0.002


train 103/103 | cost: 5.8495 | ler: 51.171: 100%|██████████| 103/103 [01:12<00:00,  1.41it/s]
validation 27/27 | cost: 6.3825 | ler: 53.719: 100%|██████████| 27/27 [00:13<00:00,  1.99it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 17/200. lr=0.002


train 103/103 | cost: 4.3175 | ler: 38.707: 100%|██████████| 103/103 [01:12<00:00,  1.43it/s]
validation 27/27 | cost: 5.8746 | ler: 45.274: 100%|██████████| 27/27 [00:13<00:00,  1.99it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 18/200. lr=0.002


train 103/103 | cost: 3.8972 | ler: 36.477: 100%|██████████| 103/103 [01:11<00:00,  1.43it/s]
validation 27/27 | cost: 7.8308 | ler: 51.830: 100%|██████████| 27/27 [00:13<00:00,  1.99it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 19/200. lr=0.002


train 103/103 | cost: 4.7581 | ler: 41.337: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 5.5624 | ler: 45.610: 100%|██████████| 27/27 [00:13<00:00,  1.96it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

reducing learning rate 0.0020->0.0005
beginning epoch 20/200. lr=0.0005


train 103/103 | cost: 4.7224 | ler: 41.660: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 5.0354 | ler: 42.219: 100%|██████████| 27/27 [00:13<00:00,  1.99it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 21/200. lr=0.0005


train 103/103 | cost: 4.2595 | ler: 37.514: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 4.9714 | ler: 39.289: 100%|██████████| 27/27 [00:13<00:00,  1.99it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 22/200. lr=0.0005


train 103/103 | cost: 3.8993 | ler: 35.024: 100%|██████████| 103/103 [01:13<00:00,  1.40it/s]
validation 27/27 | cost: 4.5207 | ler: 36.565: 100%|██████████| 27/27 [00:13<00:00,  1.99it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 23/200. lr=0.0005


train 103/103 | cost: 3.5463 | ler: 33.240: 100%|██████████| 103/103 [01:12<00:00,  1.43it/s]
validation 27/27 | cost: 4.4739 | ler: 37.215: 100%|██████████| 27/27 [00:13<00:00,  1.96it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 24/200. lr=0.0005


train 103/103 | cost: 3.5224 | ler: 31.459: 100%|██████████| 103/103 [01:12<00:00,  1.41it/s]
validation 27/27 | cost: 4.4028 | ler: 37.109: 100%|██████████| 27/27 [00:13<00:00,  1.97it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 25/200. lr=0.0005


train 103/103 | cost: 3.4324 | ler: 31.496: 100%|██████████| 103/103 [01:12<00:00,  1.41it/s]
validation 27/27 | cost: 4.0531 | ler: 33.088: 100%|██████████| 27/27 [00:13<00:00,  1.99it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 26/200. lr=0.0005


train 103/103 | cost: 3.3579 | ler: 30.115: 100%|██████████| 103/103 [01:13<00:00,  1.41it/s]
validation 27/27 | cost: 4.8334 | ler: 34.978: 100%|██████████| 27/27 [00:13<00:00,  1.99it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 27/200. lr=0.0005


train 103/103 | cost: 3.3990 | ler: 31.735: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 4.5562 | ler: 33.779: 100%|██████████| 27/27 [00:13<00:00,  1.97it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 28/200. lr=0.0005


train 103/103 | cost: 3.3668 | ler: 29.667: 100%|██████████| 103/103 [01:12<00:00,  1.41it/s]
validation 27/27 | cost: 3.5558 | ler: 30.477: 100%|██████████| 27/27 [00:13<00:00,  1.99it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 29/200. lr=0.0005


train 103/103 | cost: 3.0399 | ler: 27.505: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 4.2381 | ler: 34.970: 100%|██████████| 27/27 [00:13<00:00,  1.99it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 30/200. lr=0.0005


train 103/103 | cost: 2.8277 | ler: 25.538: 100%|██████████| 103/103 [01:12<00:00,  1.43it/s]
validation 27/27 | cost: 4.1919 | ler: 35.179: 100%|██████████| 27/27 [00:13<00:00,  1.97it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 31/200. lr=0.0005


train 103/103 | cost: 3.2082 | ler: 28.878: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 4.1262 | ler: 31.994: 100%|██████████| 27/27 [00:13<00:00,  1.97it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 32/200. lr=0.0005


train 103/103 | cost: 2.9361 | ler: 25.950: 100%|██████████| 103/103 [01:13<00:00,  1.41it/s]
validation 27/27 | cost: 3.4682 | ler: 28.772: 100%|██████████| 27/27 [00:13<00:00,  1.97it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 33/200. lr=0.0005


train 103/103 | cost: 3.0164 | ler: 27.200: 100%|██████████| 103/103 [01:13<00:00,  1.40it/s]
validation 27/27 | cost: 3.2903 | ler: 29.417: 100%|██████████| 27/27 [00:13<00:00,  1.97it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 34/200. lr=0.0005


train 103/103 | cost: 2.8321 | ler: 25.278: 100%|██████████| 103/103 [01:12<00:00,  1.41it/s]
validation 27/27 | cost: 3.7019 | ler: 30.319: 100%|██████████| 27/27 [00:13<00:00,  1.95it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 35/200. lr=0.0005


train 103/103 | cost: 2.8414 | ler: 26.710: 100%|██████████| 103/103 [01:13<00:00,  1.40it/s]
validation 27/27 | cost: 3.6800 | ler: 32.832: 100%|██████████| 27/27 [00:13<00:00,  1.96it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 36/200. lr=0.0005


train 103/103 | cost: 2.5064 | ler: 23.351: 100%|██████████| 103/103 [01:13<00:00,  1.41it/s]
validation 27/27 | cost: 3.4982 | ler: 27.065: 100%|██████████| 27/27 [00:13<00:00,  1.96it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 37/200. lr=0.0005


train 103/103 | cost: 2.4875 | ler: 22.784: 100%|██████████| 103/103 [01:12<00:00,  1.41it/s]
validation 27/27 | cost: 3.7765 | ler: 30.018: 100%|██████████| 27/27 [00:13<00:00,  1.96it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 38/200. lr=0.0005


train 103/103 | cost: 2.3661 | ler: 21.569: 100%|██████████| 103/103 [01:11<00:00,  1.43it/s]
validation 27/27 | cost: 2.9691 | ler: 25.431: 100%|██████████| 27/27 [00:13<00:00,  1.95it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 39/200. lr=0.0005


train 103/103 | cost: 2.3555 | ler: 21.784: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 3.4880 | ler: 28.550: 100%|██████████| 27/27 [00:13<00:00,  1.98it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 40/200. lr=0.0005


train 103/103 | cost: 2.2479 | ler: 21.470: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 3.3627 | ler: 25.824: 100%|██████████| 27/27 [00:13<00:00,  2.00it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 41/200. lr=0.0005


train 103/103 | cost: 2.5077 | ler: 22.897: 100%|██████████| 103/103 [01:13<00:00,  1.41it/s]
validation 27/27 | cost: 3.4741 | ler: 27.594: 100%|██████████| 27/27 [00:13<00:00,  2.00it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 42/200. lr=0.0005


train 103/103 | cost: 2.3678 | ler: 21.848: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 3.5305 | ler: 29.211: 100%|██████████| 27/27 [00:16<00:00,  1.67it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

reducing learning rate 0.0005->0.0001
beginning epoch 43/200. lr=0.000125


train 103/103 | cost: 2.6196 | ler: 24.022: 100%|██████████| 103/103 [01:13<00:00,  1.40it/s]
validation 27/27 | cost: 3.3252 | ler: 28.265: 100%|██████████| 27/27 [00:13<00:00,  1.94it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

reducing learning rate 0.0000->0.0000
beginning epoch 45/200. lr=7.8125e-06


train 103/103 | cost: 3.0109 | ler: 27.614: 100%|██████████| 103/103 [01:13<00:00,  1.41it/s]
validation 27/27 | cost: 2.8602 | ler: 24.603: 100%|██████████| 27/27 [00:13<00:00,  1.96it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 46/200. lr=7.8125e-06


train 103/103 | cost: 2.8625 | ler: 25.161: 100%|██████████| 103/103 [01:12<00:00,  1.41it/s]
validation 27/27 | cost: 2.9085 | ler: 24.647: 100%|██████████| 27/27 [00:13<00:00,  1.95it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 47/200. lr=7.8125e-06


train 103/103 | cost: 2.9773 | ler: 28.390: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 3.4550 | ler: 28.420: 100%|██████████| 27/27 [00:13<00:00,  1.95it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 48/200. lr=7.8125e-06


train 103/103 | cost: 3.3279 | ler: 29.153: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 3.2656 | ler: 27.747: 100%|██████████| 27/27 [00:13<00:00,  1.95it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 49/200. lr=7.8125e-06


train 103/103 | cost: 3.2388 | ler: 28.472: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 2.9964 | ler: 23.432: 100%|██████████| 27/27 [00:13<00:00,  1.94it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 50/200. lr=7.8125e-06


train 103/103 | cost: 3.1835 | ler: 28.221: 100%|██████████| 103/103 [01:12<00:00,  1.41it/s]
validation 27/27 | cost: 3.0184 | ler: 26.934: 100%|██████████| 27/27 [00:14<00:00,  1.91it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 51/200. lr=7.8125e-06


train 103/103 | cost: 3.3481 | ler: 29.003: 100%|██████████| 103/103 [01:13<00:00,  1.40it/s]
validation 27/27 | cost: 3.0577 | ler: 24.602: 100%|██████████| 27/27 [00:16<00:00,  1.67it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 52/200. lr=7.8125e-06


train 103/103 | cost: 3.0195 | ler: 27.907: 100%|██████████| 103/103 [01:13<00:00,  1.41it/s]
validation 27/27 | cost: 2.7949 | ler: 24.742: 100%|██████████| 27/27 [00:13<00:00,  1.95it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 53/200. lr=7.8125e-06


train 103/103 | cost: 2.9235 | ler: 26.813: 100%|██████████| 103/103 [01:13<00:00,  1.40it/s]
validation 27/27 | cost: 2.9191 | ler: 23.664: 100%|██████████| 27/27 [00:13<00:00,  1.97it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

reducing learning rate 0.0000->0.0000
beginning epoch 54/200. lr=1.953125e-06


train 103/103 | cost: 3.3484 | ler: 29.385: 100%|██████████| 103/103 [01:13<00:00,  1.40it/s]
validation 27/27 | cost: 2.7047 | ler: 21.773: 100%|██████████| 27/27 [00:13<00:00,  1.98it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 55/200. lr=1.953125e-06


train 103/103 | cost: 3.1186 | ler: 27.202: 100%|██████████| 103/103 [01:13<00:00,  1.41it/s]
validation 27/27 | cost: 3.1213 | ler: 25.327: 100%|██████████| 27/27 [00:14<00:00,  1.93it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 56/200. lr=1.953125e-06


train 103/103 | cost: 3.0383 | ler: 25.928: 100%|██████████| 103/103 [01:13<00:00,  1.40it/s]
validation 27/27 | cost: 2.8913 | ler: 25.107: 100%|██████████| 27/27 [00:13<00:00,  1.96it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 57/200. lr=1.953125e-06


train 103/103 | cost: 2.9151 | ler: 26.532: 100%|██████████| 103/103 [01:13<00:00,  1.41it/s]
validation 27/27 | cost: 3.1285 | ler: 25.725: 100%|██████████| 27/27 [00:13<00:00,  1.95it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 58/200. lr=1.953125e-06


train 103/103 | cost: 3.1591 | ler: 27.986: 100%|██████████| 103/103 [01:13<00:00,  1.41it/s]
validation 27/27 | cost: 2.8963 | ler: 25.849: 100%|██████████| 27/27 [00:13<00:00,  1.95it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

reducing learning rate 0.0000->0.0000
beginning epoch 59/200. lr=4.8828125e-07


train 103/103 | cost: 2.6928 | ler: 25.188: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 3.2301 | ler: 24.819: 100%|██████████| 27/27 [00:13<00:00,  1.95it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

reducing learning rate 0.0000->0.0000
beginning epoch 60/200. lr=1.220703125e-07


train 103/103 | cost: 3.1863 | ler: 28.319: 100%|██████████| 103/103 [01:12<00:00,  1.43it/s]
validation 27/27 | cost: 3.0841 | ler: 25.022: 100%|██████████| 27/27 [00:13<00:00,  1.96it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

reducing learning rate 0.0000->0.0000
beginning epoch 61/200. lr=3.0517578125e-08


train 103/103 | cost: 2.9052 | ler: 26.303: 100%|██████████| 103/103 [01:12<00:00,  1.42it/s]
validation 27/27 | cost: 3.2370 | ler: 27.899: 100%|██████████| 27/27 [00:13<00:00,  1.97it/s]

reducing learning rate 0.0000->0.0000
early stopping!



