In [1]:
%pylab inline

import pandas as pd
import nltk

import kapre
import arrow
import pprint
import threading
import pprint
from tqdm import tqdm

from soph import MFCC, center_wave, Delta
from soph import ex_generator as old_gen

import tensorflow as tf
config = tf.ConfigProto(allow_soft_placement = True)
sess = tf.Session(config = config)

import keras
import keras.backend as K
K.set_session(sess)


ex_df = pd.read_pickle("data/ex_df.pkl")

Populating the interactive namespace from numpy and matplotlib


Using TensorFlow backend.


In [2]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 16454814290371672280, name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 357957632
 locality {
   bus_id: 1
 }
 incarnation: 16880382070085251117
 physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:00:1e.0, compute capability: 3.7"]

# Utility code

In [3]:
nltk.download('cmudict')
arpabet = nltk.corpus.cmudict.dict()

words = list(ex_df.raw_label.unique())
words.remove(np.nan)
words.remove("silence")

phone_dict = dict()
phone_set = set()
maxlen = 0
for w in words:
    print(w, arpabet[w])
    phones = arpabet[w][0]
    phones = [p.strip("012") for p in phones] #remove emphasis on vowels
    phones_b = phones
    phone_dict[w] = phones_b
    phone_set |= set(phones)
    if (len(phones_b)) > maxlen:
        maxlen = len(phones_b)
phone_dict["silence"] = ["sil"]
alphabet = sorted(list(phone_set)) + ["sil"] + ["*"]

def text_to_labels(text):
    phones = phone_dict[text]
    ret = [alphabet.index(p) for p in phones]
    return ret

N_CAT = len(alphabet)
# pprint.pprint(phone_dict, compact=True)
print("{} phonemes in alphabet".format(N_CAT))

pprint.pprint(alphabet, compact=True)

[nltk_data] Downloading package cmudict to /home/ubuntu/nltk_data...
[nltk_data]   Package cmudict is already up-to-date!
right [['R', 'AY1', 'T']]
eight [['EY1', 'T']]
cat [['K', 'AE1', 'T']]
tree [['T', 'R', 'IY1']]
bed [['B', 'EH1', 'D']]
happy [['HH', 'AE1', 'P', 'IY0']]
go [['G', 'OW1']]
dog [['D', 'AO1', 'G']]
no [['N', 'OW1']]
wow [['W', 'AW1']]
nine [['N', 'AY1', 'N']]
left [['L', 'EH1', 'F', 'T']]
stop [['S', 'T', 'AA1', 'P']]
three [['TH', 'R', 'IY1']]
sheila [['SH', 'IY1', 'L', 'AH0']]
one [['W', 'AH1', 'N'], ['HH', 'W', 'AH1', 'N']]
bird [['B', 'ER1', 'D']]
zero [['Z', 'IH1', 'R', 'OW0'], ['Z', 'IY1', 'R', 'OW0']]
seven [['S', 'EH1', 'V', 'AH0', 'N']]
up [['AH1', 'P']]
marvin [['M', 'AA1', 'R', 'V', 'IH0', 'N']]
two [['T', 'UW1']]
house [['HH', 'AW1', 'S']]
down [['D', 'AW1', 'N']]
six [['S', 'IH1', 'K', 'S']]
yes [['Y', 'EH1', 'S']]
on [['AA1', 'N'], ['AO1', 'N']]
five [['F', 'AY1', 'V']]
off [['AO1', 'F']]
four [['F', 'AO1', 'R']]
34 phonemes in alphabet
['AA', 'AE', 'AH'

In [4]:
def ex_generator(
        batch_size=32,
        shuffle=True,
        state=["train"],
        p_transform=0,
        vol_range=0,
        shift=0,
):

    epoch_df = ex_df[ex_df.state.isin(state)]
    num_ex = len(epoch_df)
    indices = np.arange(num_ex)

    # epoch loop runs
    while True:

        # shuffle anew every epoch
        if shuffle:
            epoch_df = epoch_df.sample(frac=1)

        # batch loop
        for i in np.arange(0, num_ex, batch_size):

            if i+batch_size > num_ex:
                continue
            
            batch_df = epoch_df.iloc[i:i + batch_size, :]

            x = np.zeros((len(batch_df), 16000))
            indices = []
            values = []

            # example loop
            for b in range(len(batch_df)):

                x[b, ...] = center_wave(
                    epoch_df.fn.values[b],
                    vol_range=vol_range,
                    shift=shift,
                    p_transform=p_transform)

                labels_i = text_to_labels(epoch_df.raw_label.values[b])
                
                for t,v in enumerate(labels_i):
                    indices.append([b,t])
                    values.append(v)
                    


            indices = np.array(indices, dtype='int32')
            values = np.array(values, dtype='int32')
            shape = np.array([len(batch_df), maxlen], dtype='int32')
            sequence_length = np.full((len(batch_df),), 100, dtype='int32')

            yield {"wav":x, 
                   "labels": (indices, values, shape),
                   "sequence_length": sequence_length}


In [5]:
N_SEQ = 100
SR = 16000
F_STEP = SR // N_SEQ
F_LEN = (5 * F_STEP) // 2
N_FFT = 512
N_MEL = 30
N_MFCC = 15
MAX_F = 4000
MIN_F = 40

# DROP = .5
NOISE = .75

In [6]:
with tf.device('/gpu:0'):
    lr = tf.placeholder(tf.float32, [], name='lr')
    wav_input = K.placeholder((None, SR), dtype='float32', name="wav_input")
    labels = K.placeholder(sparse=True, dtype='int32', name="labels")
    sequence_length = K.placeholder(
        [None], dtype='int32', name="sequence_length")

    wav_mean = K.mean(wav_input, axis=1, keepdims=True)
    wav_std = K.std(wav_input, axis=1, keepdims=True)
    wav_input = (wav_input - wav_input) / wav_std

    base_model = keras.Sequential([
        keras.layers.InputLayer(input_tensor=wav_input),
        MFCC(
            frame_length=F_LEN,
            frame_step=F_STEP,
            fft_length=N_FFT,
            num_mel_bins=N_MEL,
            sr=SR,
            n_mfcc=N_MFCC,
            upper_edge_hertz=MAX_F,
            lower_edge_hertz=MIN_F),
        keras.layers.BatchNormalization(),
        keras.layers.GaussianNoise(NOISE),
        Delta(),
        keras.layers.Reshape((N_SEQ, N_MFCC * 3)),

        #         keras.layers.Bidirectional(
        #             keras.layers.GRU(50, return_sequences=True), merge_mode="concat"),
        #         keras.layers.BatchNormalization(),
        #         keras.layers.Dropout(DROP),

        keras.layers.Bidirectional(
            keras.layers.GRU(50, activation="elu", return_sequences=True), merge_mode="ave"),
        keras.layers.BatchNormalization(),
        keras.layers.GaussianNoise(NOISE),
        
        keras.layers.Bidirectional(
            keras.layers.GRU(N_CAT, activation="linear", return_sequences=True),
            merge_mode="ave"),
    ])
    base_model.summary()

    logits = base_model.output
    logits = tf.transpose(logits, (1, 0, 2))

    loss = tf.nn.ctc_loss(labels, logits, sequence_length)
    cost = tf.reduce_mean(loss)
    optimizer = tf.contrib.opt.NadamOptimizer(learning_rate=lr).minimize(cost)

    decoded, _ = tf.nn.ctc_beam_search_decoder(
        logits,
        sequence_length,
        beam_width=100,
        top_paths=10,
    )
    greedy_decoded, _ = tf.nn.ctc_beam_search_decoder(
        logits,
        sequence_length,
        beam_width=5,
        top_paths=1,
    )

    ler = tf.reduce_mean(
        tf.edit_distance(tf.cast(decoded[0], 'int32'), labels))
    
    greedy_ler = tf.reduce_mean(
        tf.edit_distance(tf.cast(greedy_decoded[0], 'int32'), labels))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 16000)             0         
_________________________________________________________________
mfcc_1 (MFCC)                (None, 100, 15)           7710      
_________________________________________________________________
batch_normalization_1 (Batch (None, 100, 15)           60        
_________________________________________________________________
gaussian_noise_1 (GaussianNo (None, 100, 15)           0         
_________________________________________________________________
delta_1 (Delta)              (None, 100, 15, 3)        5         
_________________________________________________________________
reshape_1 (Reshape)          (None, 100, 45)           0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 100, 50)           28800     
__________

In [7]:
init_op = tf.global_variables_initializer()
sess.run(init_op)

In [8]:
N_BATCH = 512
train_gen = ex_generator(
    batch_size=N_BATCH,
    state=["train"],
    p_transform=0, 
    vol_range=.1, 
    shift=0,
)
val_gen = ex_generator(
    batch_size=N_BATCH,
    state=["val", "test"],
    shuffle=True,
)

In [9]:
num_epochs = 200
train_steps = sum(ex_df.state.isin(["train"]))//N_BATCH
val_steps = sum(ex_df.state.isin(["val", "test"]))//N_BATCH
LR = 0.002
LR_WINDOW = 3
LR_STEP = .25
STOP_WINDOW = 6

with sess.as_default():
    hist = {
        "train_cost": [],
        "train_ler": [],
        "val_cost": [],
        "val_ler": [],
        "lr": []
    }
    
    for epoch_i in range(num_epochs):
        train_cost = []
        train_ler = []
    
        print("beginning epoch {}/{}. lr={}".format(epoch_i+1, num_epochs, LR))
        
        step_bar = tqdm(range(train_steps))
        
        for step_i in step_bar:
            
            train_batch = next(train_gen)
            
            feed = {
                lr: LR,
                wav_input: train_batch["wav"],
                sequence_length: train_batch["sequence_length"],
                labels: train_batch["labels"],
                K.learning_phase(): 1,
            }
#             optimizer.run(feed_dict=feed)
            batch_cost, batch_ler, _, _ = sess.run([cost, greedy_ler, optimizer, base_model.updates], feed)
            train_cost.append(batch_cost)
            train_ler.append(batch_ler*100)
            
            step_bar.set_description("train {}/{} | cost: {:.4f} | ler: {:.3f}".format(
                step_i+1,
                train_steps,
                np.mean(train_cost), 
                np.mean(train_ler),
            ))
            
        val_cost = []
        val_ler = []
        step_bar = tqdm(range(val_steps))
        for step_i in step_bar:

            val_batch = next(val_gen)
            feed = {
                lr: LR,
                wav_input: val_batch['wav'],
                sequence_length: val_batch['sequence_length'],
                labels: val_batch['labels'],
                K.learning_phase(): 0,
            }
            batch_cost, batch_ler = sess.run([cost, greedy_ler], feed)
            val_cost.append(batch_cost)
            val_ler.append(batch_ler*100)
            
            step_bar.set_description("validation {}/{} | cost: {:.4f} | ler: {:.3f}".format(
                step_i+1,
                val_steps,
                np.mean(val_cost), 
                np.mean(val_ler),
            ))

        
        hist["train_cost"].append(train_cost)
        hist["train_ler"].append(train_ler)
        hist["val_cost"].append(val_cost)
        hist["val_ler"].append(val_ler)
        hist["lr"].append(LR)
        
        out_means = np.mean(hist["val_ler"], axis=1)
        best_i = argmin(out_means)
        
        if epoch_i - best_i > LR_WINDOW:
            print("reducing learning rate {:.4f}->{:.4f}".format(
                LR,
                LR*LR_STEP
            ))
            LR = LR*LR_STEP
                
        if epoch_i - best_i > STOP_WINDOW:
            print("early stopping!")
            break

  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 1/200. lr=0.002


train 103/103 | cost: 49.2793 | ler: 293.995: 100%|██████████| 103/103 [01:26<00:00,  1.20it/s]
validation 27/27 | cost: 18.6865 | ler: 108.054: 100%|██████████| 27/27 [00:18<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 2/200. lr=0.002


train 103/103 | cost: 14.0917 | ler: 98.061: 100%|██████████| 103/103 [01:22<00:00,  1.24it/s]
validation 27/27 | cost: 14.0361 | ler: 94.922: 100%|██████████| 27/27 [00:18<00:00,  1.43it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 3/200. lr=0.002


train 103/103 | cost: 13.0522 | ler: 96.459: 100%|██████████| 103/103 [01:24<00:00,  1.22it/s]
validation 27/27 | cost: 12.7069 | ler: 95.676: 100%|██████████| 27/27 [00:18<00:00,  1.44it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 4/200. lr=0.002


train 103/103 | cost: 11.7292 | ler: 94.506: 100%|██████████| 103/103 [01:22<00:00,  1.26it/s]
validation 27/27 | cost: 11.9916 | ler: 94.574: 100%|██████████| 27/27 [00:18<00:00,  1.43it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 5/200. lr=0.002


train 103/103 | cost: 11.4999 | ler: 91.329: 100%|██████████| 103/103 [01:21<00:00,  1.26it/s]
validation 27/27 | cost: 11.6842 | ler: 91.824: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 6/200. lr=0.002


train 103/103 | cost: 11.2355 | ler: 90.015: 100%|██████████| 103/103 [01:22<00:00,  1.25it/s]
validation 27/27 | cost: 11.2519 | ler: 88.299: 100%|██████████| 27/27 [00:18<00:00,  1.43it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 7/200. lr=0.002


train 103/103 | cost: 10.4324 | ler: 87.327: 100%|██████████| 103/103 [01:22<00:00,  1.24it/s]
validation 27/27 | cost: 11.6367 | ler: 88.423: 100%|██████████| 27/27 [00:18<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 8/200. lr=0.002


train 103/103 | cost: 10.3218 | ler: 85.002: 100%|██████████| 103/103 [01:21<00:00,  1.27it/s]
validation 27/27 | cost: 10.8182 | ler: 86.109: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 9/200. lr=0.002


train 103/103 | cost: 9.5938 | ler: 80.719: 100%|██████████| 103/103 [01:23<00:00,  1.23it/s]
validation 27/27 | cost: 10.5199 | ler: 83.347: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 10/200. lr=0.002


train 103/103 | cost: 9.5166 | ler: 81.300: 100%|██████████| 103/103 [01:21<00:00,  1.26it/s]
validation 27/27 | cost: 9.9455 | ler: 80.219: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 11/200. lr=0.002


train 103/103 | cost: 9.2480 | ler: 79.198: 100%|██████████| 103/103 [01:22<00:00,  1.26it/s]
validation 27/27 | cost: 11.6149 | ler: 90.528: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 12/200. lr=0.002


train 103/103 | cost: 10.2249 | ler: 84.117: 100%|██████████| 103/103 [01:23<00:00,  1.23it/s]
validation 27/27 | cost: 10.6259 | ler: 80.005: 100%|██████████| 27/27 [00:18<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 13/200. lr=0.002


train 103/103 | cost: 9.4016 | ler: 79.911: 100%|██████████| 103/103 [01:21<00:00,  1.26it/s]
validation 27/27 | cost: 9.4589 | ler: 81.339: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 14/200. lr=0.002


train 103/103 | cost: 8.9941 | ler: 77.149: 100%|██████████| 103/103 [01:22<00:00,  1.24it/s]
validation 27/27 | cost: 9.6969 | ler: 79.409: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 15/200. lr=0.002


train 103/103 | cost: 8.7085 | ler: 76.468: 100%|██████████| 103/103 [01:22<00:00,  1.25it/s]
validation 27/27 | cost: 9.8614 | ler: 78.373: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 16/200. lr=0.002


train 103/103 | cost: 8.2387 | ler: 73.304: 100%|██████████| 103/103 [01:22<00:00,  1.25it/s]
validation 27/27 | cost: 9.2127 | ler: 76.145: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 17/200. lr=0.002


train 103/103 | cost: 8.4049 | ler: 72.114: 100%|██████████| 103/103 [01:23<00:00,  1.24it/s]
validation 27/27 | cost: 9.4933 | ler: 76.825: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 18/200. lr=0.002


train 103/103 | cost: 8.4746 | ler: 72.453: 100%|██████████| 103/103 [01:22<00:00,  1.25it/s]
validation 27/27 | cost: 9.6042 | ler: 76.190: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 19/200. lr=0.002


train 103/103 | cost: 8.1503 | ler: 71.075: 100%|██████████| 103/103 [01:22<00:00,  1.25it/s]
validation 27/27 | cost: 9.3061 | ler: 76.349: 100%|██████████| 27/27 [00:19<00:00,  1.41it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 20/200. lr=0.002


train 103/103 | cost: 7.9200 | ler: 70.293: 100%|██████████| 103/103 [01:22<00:00,  1.25it/s]
validation 27/27 | cost: 9.4422 | ler: 76.753: 100%|██████████| 27/27 [00:19<00:00,  1.41it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

reducing learning rate 0.0020->0.0005
beginning epoch 21/200. lr=0.0005


train 103/103 | cost: 8.4397 | ler: 73.221: 100%|██████████| 103/103 [01:22<00:00,  1.25it/s]
validation 27/27 | cost: 9.2764 | ler: 72.724: 100%|██████████| 27/27 [00:19<00:00,  1.41it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 22/200. lr=0.0005


train 103/103 | cost: 8.5670 | ler: 72.258: 100%|██████████| 103/103 [01:23<00:00,  1.24it/s]
validation 27/27 | cost: 8.7800 | ler: 73.031: 100%|██████████| 27/27 [00:19<00:00,  1.41it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 23/200. lr=0.0005


train 103/103 | cost: 8.0268 | ler: 71.460: 100%|██████████| 103/103 [01:23<00:00,  1.24it/s]
validation 27/27 | cost: 8.5711 | ler: 71.212: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 24/200. lr=0.0005


train 103/103 | cost: 8.1387 | ler: 71.273: 100%|██████████| 103/103 [01:22<00:00,  1.24it/s]
validation 27/27 | cost: 8.2299 | ler: 70.429: 100%|██████████| 27/27 [00:19<00:00,  1.41it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 25/200. lr=0.0005


train 103/103 | cost: 8.1595 | ler: 71.223: 100%|██████████| 103/103 [01:22<00:00,  1.25it/s]
validation 27/27 | cost: 8.4973 | ler: 72.518: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 26/200. lr=0.0005


train 103/103 | cost: 7.9743 | ler: 70.891: 100%|██████████| 103/103 [01:23<00:00,  1.23it/s]
validation 27/27 | cost: 8.4093 | ler: 71.782: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 27/200. lr=0.0005


train 103/103 | cost: 8.1616 | ler: 71.854: 100%|██████████| 103/103 [01:22<00:00,  1.25it/s]
validation 27/27 | cost: 8.1335 | ler: 68.149: 100%|██████████| 27/27 [00:19<00:00,  1.41it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 28/200. lr=0.0005


train 103/103 | cost: 7.9063 | ler: 69.957: 100%|██████████| 103/103 [01:23<00:00,  1.23it/s]
validation 27/27 | cost: 8.7026 | ler: 71.637: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 29/200. lr=0.0005


train 103/103 | cost: 7.7730 | ler: 68.276: 100%|██████████| 103/103 [01:21<00:00,  1.26it/s]
validation 27/27 | cost: 8.2159 | ler: 70.513: 100%|██████████| 27/27 [00:19<00:00,  1.41it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 30/200. lr=0.0005


train 103/103 | cost: 7.8045 | ler: 69.174: 100%|██████████| 103/103 [01:23<00:00,  1.23it/s]
validation 27/27 | cost: 8.6116 | ler: 69.384: 100%|██████████| 27/27 [00:19<00:00,  1.41it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 31/200. lr=0.0005


train 103/103 | cost: 7.9637 | ler: 70.489: 100%|██████████| 103/103 [01:23<00:00,  1.23it/s]
validation 27/27 | cost: 8.8956 | ler: 72.602: 100%|██████████| 27/27 [00:19<00:00,  1.40it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

reducing learning rate 0.0005->0.0001
beginning epoch 32/200. lr=0.000125


train 103/103 | cost: 7.6309 | ler: 67.641: 100%|██████████| 103/103 [01:22<00:00,  1.25it/s]
validation 27/27 | cost: 8.2551 | ler: 72.288: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

reducing learning rate 0.0001->0.0000
beginning epoch 33/200. lr=3.125e-05


train 103/103 | cost: 8.3755 | ler: 70.461: 100%|██████████| 103/103 [01:21<00:00,  1.26it/s]
validation 27/27 | cost: 8.1669 | ler: 68.760: 100%|██████████| 27/27 [00:19<00:00,  1.41it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

reducing learning rate 0.0000->0.0000
beginning epoch 34/200. lr=7.8125e-06


train 103/103 | cost: 8.4106 | ler: 71.433: 100%|██████████| 103/103 [01:22<00:00,  1.25it/s]
validation 27/27 | cost: 8.0074 | ler: 67.816: 100%|██████████| 27/27 [00:19<00:00,  1.41it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 35/200. lr=7.8125e-06


train 103/103 | cost: 8.1133 | ler: 71.637: 100%|██████████| 103/103 [01:23<00:00,  1.24it/s]
validation 27/27 | cost: 8.1177 | ler: 68.481: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 36/200. lr=7.8125e-06


train 103/103 | cost: 8.0936 | ler: 71.510: 100%|██████████| 103/103 [01:22<00:00,  1.25it/s]
validation 27/27 | cost: 8.0439 | ler: 68.020: 100%|██████████| 27/27 [00:19<00:00,  1.41it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 37/200. lr=7.8125e-06


train 103/103 | cost: 8.1592 | ler: 70.225: 100%|██████████| 103/103 [01:23<00:00,  1.23it/s]
validation 27/27 | cost: 8.3044 | ler: 70.213: 100%|██████████| 27/27 [00:19<00:00,  1.41it/s]
  0%|          | 0/103 [00:00<?, ?it/s]

beginning epoch 38/200. lr=7.8125e-06


train 103/103 | cost: 8.3470 | ler: 72.022: 100%|██████████| 103/103 [01:22<00:00,  1.25it/s]
validation 10/27 | cost: 7.7932 | ler: 67.874:  37%|███▋      | 10/27 [00:07<00:12,  1.41it/s]

KeyboardInterrupt: 