In [1]:
# Author: Sining Sun, Zhanheng Yang, Binbin Zhang

import numpy as np
import kaldi_io
from utils import *

In [2]:
targets_list = ['Z', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']
targets_mapping = {}
for i, x in enumerate(targets_list):
    targets_mapping[x] = i

In [3]:
class Layer:
    def forward(self, input):
        ''' Forward function by input
        Args:
            input: input, B * N matrix, B for batch size
        Returns:
            output when applied this layer
        '''
        raise 'Not implement error'

    def backward(self, input, output, d_output):
        ''' Compute gradient of this layer's input by (input, output, d_output)
            as well as compute the gradient of the parameter of this layer
        Args:
            input: input of this layer
            output: output of this layer
            d_output: accumulated gradient from final output to this
                      layer's output
        Returns:
            accumulated gradient from final output to this layer's input
        '''
        raise 'Not implement error'

    def set_learning_rate(self, lr):
        ''' Set learning rate of this layer'''
        self.learning_rate = lr

    def update(self):
        ''' Update this layers parameter if it has or do nothing
        '''

In [4]:
class ReLU(Layer):
    def forward(self, input):
        # BEGIN_LAB
        return (abs(input) + input) / 2
        # END_LAB

    def backward(self, input, output, d_output):
        # BEGIN_LAB
        d_output[input<=0]=0
        return d_output
        # END_LAB

In [5]:
class FullyConnect(Layer):
    def __init__(self, in_dim, out_dim):
        self.w = np.random.randn(out_dim, in_dim) * np.sqrt(2.0 / in_dim)
        self.b = np.zeros(out_dim)
        self.dw = np.zeros((out_dim, in_dim))
        self.db = np.zeros(out_dim)

    def forward(self, input):
        # BEGIN_LAB
        return np.dot(input,self.w.T)+self.b
        # END_LAB

    def backward(self, input, output, d_output):          #d_out是误差
        #print(input.shape,output.shape,d_output.shape,self.w.shape)
        batch_size = input.shape[0]
        in_diff = None
        # BEGIN_LAB, compute in_diff/dw/db here
        
        self.dw = np.dot(d_output.T,input)
        
        self.db = np.sum(d_output, axis=0)
            
        in_diff = np.dot(d_output,self.w)
        
        # END_LAB
        # Normalize dw/db by batch size
        self.dw = self.dw / batch_size
        self.db = self.db / batch_size
        return in_diff

    def update(self):
        self.w = self.w - self.learning_rate * self.dw
        self.b = self.b - self.learning_rate * self.db

In [6]:
class Softmax(Layer):
    def forward(self, input):
        row_max = input.max(axis=1).reshape(input.shape[0], 1)
        x = input - row_max
        return np.exp(x) / np.sum(np.exp(x), axis=1).reshape(x.shape[0], 1)

    def backward(self, input, output, d_output):
        ''' Directly return the d_output as we show below, the grad is to
            the activation(input) of softmax
        '''
        return d_output

In [7]:
class DNN:
    def __init__(self, in_dim, out_dim, hidden_dim, num_hidden):
        self.layers = []
        self.layers.append(FullyConnect(in_dim, hidden_dim))
        self.layers.append(ReLU())
        for i in range(num_hidden):
            self.layers.append(FullyConnect(hidden_dim, hidden_dim))
            self.layers.append(ReLU())
        self.layers.append(FullyConnect(hidden_dim, out_dim))
        self.layers.append(Softmax())

    def set_learning_rate(self, lr):
        for layer in self.layers:
            layer.set_learning_rate(lr)

    def forward(self, input):
        self.forward_buf = []
        out = input
        self.forward_buf.append(out)
        for i in range(len(self.layers)):
            out = self.layers[i].forward(out)
            self.forward_buf.append(out)
        assert (len(self.forward_buf) == len(self.layers) + 1)
        return out

    def backward(self, grad):
        '''
        Args:
            grad: the grad is to the activation before softmax
        '''
        self.backward_buf = [None] * len(self.layers)
        self.backward_buf[len(self.layers) - 1] = grad
        for i in range(len(self.layers) - 2, -1, -1):
            grad = self.layers[i].backward(self.forward_buf[i],
                                           self.forward_buf[i + 1],
                                           self.backward_buf[i + 1])
            self.backward_buf[i] = grad

    def update(self):
        for layer in self.layers:
            layer.update()

In [8]:
def one_hot(labels, total_label):
    output = np.zeros((labels.shape[0], total_label))
    for i in range(labels.shape[0]):
        output[i][labels[i]] = 1.0
    return output

In [9]:
def train(dnn):
    utt2feat, utt2target = read_feats_and_targets('train/feats.scp',
                                                  'train/text')                #特征字典，标签字典
    print(len(utt2feat),len(utt2target))                              #长度330
    inputs, labels = build_input(targets_mapping, utt2feat, utt2target)
    print(inputs.shape,labels.shape)
    num_samples = inputs.shape[0]                                             #总长度
    # Shuffle data
    permute = np.random.permutation(num_samples)                       #乱序数组
    inputs = inputs[permute]                                              #1个input是429行，2维
    labels = labels[permute]                                             #1维，状态
    num_epochs = 20
    batch_size = 100
    for i in range(num_epochs):
        cur = 0
        while cur < num_samples:
            end = min(cur + batch_size, num_samples)         #这一次的epoch的最后是end，开始是cur
            input = inputs[cur:end]
            label = labels[cur:end]
                                                              # Step1: forward
            out = dnn.forward(input)                          #正向传播
            #print(out.shape,label.shape)                                 #最后一层是每一帧对于11个目标状态的概率值
            one_hot_label = one_hot(label, out.shape[1]) 
                                                                # Step2: Compute cross entropy loss and backward
            loss = -np.sum(np.log(out + 1e-20) * one_hot_label) / out.shape[0]
                                                              # The grad is to activation before softmax
            grad = out - one_hot_label
            dnn.backward(grad)
                                                                # Step3: update parameters
            dnn.update()
            print('Epoch {} num_samples {} loss {}'.format(i, cur, loss))
            cur += batch_size

In [10]:
def test(dnn):
    utt2feat, utt2target = read_feats_and_targets('test/feats.scp',
                                                  'test/text')                    
    total = len(utt2feat)
    correct = 0
    for utt in utt2feat:
        t = utt2target[utt]
        ark = utt2feat[utt]
        mat = kaldi_io.read_mat(ark)
        mat = splice(mat, 5, 5)
        posterior = dnn.forward(mat)
        posterior = np.sum(posterior, axis=0) / float(mat.shape[0])
        predict = targets_list[np.argmax(posterior)]
        if t == predict: correct += 1
        print('label: {} predict: {}'.format(t, predict))
    print('Acc: {}'.format(float(correct) / total))

In [11]:
def main():
    np.random.seed(777)
    # We splice the raw feat with left 5 frames and right 5 frames
    # So the input here is 39 * (5 + 1 + 5) = 429
    dnn = DNN(429, 11, 128, 1)
    dnn.set_learning_rate(5e-3)
    train(dnn)
    test(dnn)

if __name__ == '__main__':
    main()

330 330
(18593, 429) (18593,)
Epoch 0 num_samples 0 loss 15.628633396495088
Epoch 0 num_samples 100 loss 15.596826659003375
Epoch 0 num_samples 200 loss 12.858356923044662
Epoch 0 num_samples 300 loss 10.824284632153876
Epoch 0 num_samples 400 loss 9.603603500457153
Epoch 0 num_samples 500 loss 8.570295367936513
Epoch 0 num_samples 600 loss 10.832593148085698
Epoch 0 num_samples 700 loss 8.338319712491744
Epoch 0 num_samples 800 loss 8.974606807639109
Epoch 0 num_samples 900 loss 7.947300554497961
Epoch 0 num_samples 1000 loss 7.354009499320913
Epoch 0 num_samples 1100 loss 7.8172664478058325
Epoch 0 num_samples 1200 loss 7.496290822826072
Epoch 0 num_samples 1300 loss 5.335715814252815
Epoch 0 num_samples 1400 loss 6.226340094973284
Epoch 0 num_samples 1500 loss 6.29869771450125
Epoch 0 num_samples 1600 loss 6.763292102542775
Epoch 0 num_samples 1700 loss 6.473956151828652
Epoch 0 num_samples 1800 loss 6.129419316061203
Epoch 0 num_samples 1900 loss 6.333406284110201
Epoch 0 num_sampl

Epoch 1 num_samples 5600 loss 1.6676728845549156
Epoch 1 num_samples 5700 loss 1.4839630858861073
Epoch 1 num_samples 5800 loss 1.3207216568758295
Epoch 1 num_samples 5900 loss 1.1588621165373225
Epoch 1 num_samples 6000 loss 1.0243989809623566
Epoch 1 num_samples 6100 loss 1.1810389035826099
Epoch 1 num_samples 6200 loss 1.3274508799786944
Epoch 1 num_samples 6300 loss 1.4384953650542838
Epoch 1 num_samples 6400 loss 1.3313889840565363
Epoch 1 num_samples 6500 loss 1.4728314343410032
Epoch 1 num_samples 6600 loss 0.9463642249480503
Epoch 1 num_samples 6700 loss 1.2757745959393514
Epoch 1 num_samples 6800 loss 1.4057678242684375
Epoch 1 num_samples 6900 loss 1.0567166659183584
Epoch 1 num_samples 7000 loss 1.265610905486918
Epoch 1 num_samples 7100 loss 1.416689417501393
Epoch 1 num_samples 7200 loss 0.9498825387817841
Epoch 1 num_samples 7300 loss 1.3333214745220416
Epoch 1 num_samples 7400 loss 1.4465545948235203
Epoch 1 num_samples 7500 loss 1.2549480206709058
Epoch 1 num_samples 76

Epoch 2 num_samples 12900 loss 0.817296963062241
Epoch 2 num_samples 13000 loss 0.6498124489251984
Epoch 2 num_samples 13100 loss 0.5695833699292735
Epoch 2 num_samples 13200 loss 0.9146561458429707
Epoch 2 num_samples 13300 loss 0.7238350129366089
Epoch 2 num_samples 13400 loss 0.6189412334996253
Epoch 2 num_samples 13500 loss 0.7563048086384001
Epoch 2 num_samples 13600 loss 0.6648727846883569
Epoch 2 num_samples 13700 loss 0.8726822869358131
Epoch 2 num_samples 13800 loss 0.9219394214736301
Epoch 2 num_samples 13900 loss 0.9054895110881557
Epoch 2 num_samples 14000 loss 0.794376841256998
Epoch 2 num_samples 14100 loss 0.7262202579321889
Epoch 2 num_samples 14200 loss 0.9417526833994718
Epoch 2 num_samples 14300 loss 0.9575829327391646
Epoch 2 num_samples 14400 loss 0.7434520602167276
Epoch 2 num_samples 14500 loss 0.8139019682524213
Epoch 2 num_samples 14600 loss 0.8593817073842467
Epoch 2 num_samples 14700 loss 0.7484529639500218
Epoch 2 num_samples 14800 loss 0.930560521835829
Epo

Epoch 4 num_samples 2000 loss 0.4538780155078115
Epoch 4 num_samples 2100 loss 0.5592947300697403
Epoch 4 num_samples 2200 loss 0.6085620466481878
Epoch 4 num_samples 2300 loss 0.49344872074687485
Epoch 4 num_samples 2400 loss 0.4946414735328116
Epoch 4 num_samples 2500 loss 0.4717013085272724
Epoch 4 num_samples 2600 loss 0.5334099278309171
Epoch 4 num_samples 2700 loss 0.7035613856473796
Epoch 4 num_samples 2800 loss 0.4837245239447043
Epoch 4 num_samples 2900 loss 0.45291557041270486
Epoch 4 num_samples 3000 loss 0.7349207345227273
Epoch 4 num_samples 3100 loss 0.5853577471085935
Epoch 4 num_samples 3200 loss 0.5026187087353026
Epoch 4 num_samples 3300 loss 0.5669949675186393
Epoch 4 num_samples 3400 loss 0.4713554118124283
Epoch 4 num_samples 3500 loss 0.40125901603999
Epoch 4 num_samples 3600 loss 0.537007387044663
Epoch 4 num_samples 3700 loss 0.4518539299546558
Epoch 4 num_samples 3800 loss 0.5679478422227493
Epoch 4 num_samples 3900 loss 0.47631976573357476
Epoch 4 num_samples 

Epoch 5 num_samples 9700 loss 0.5373812904676835
Epoch 5 num_samples 9800 loss 0.2986148509515574
Epoch 5 num_samples 9900 loss 0.5683386761014836
Epoch 5 num_samples 10000 loss 0.3444956093429419
Epoch 5 num_samples 10100 loss 0.563923983176752
Epoch 5 num_samples 10200 loss 0.45961473365087246
Epoch 5 num_samples 10300 loss 0.3489713096725579
Epoch 5 num_samples 10400 loss 0.5443055295018105
Epoch 5 num_samples 10500 loss 0.4763285213754094
Epoch 5 num_samples 10600 loss 0.5586356714480736
Epoch 5 num_samples 10700 loss 0.46511396306660324
Epoch 5 num_samples 10800 loss 0.4467565309926971
Epoch 5 num_samples 10900 loss 0.6844192476274338
Epoch 5 num_samples 11000 loss 0.5272150070133199
Epoch 5 num_samples 11100 loss 0.4040822972748293
Epoch 5 num_samples 11200 loss 0.7236160425740193
Epoch 5 num_samples 11300 loss 0.5350332176160307
Epoch 5 num_samples 11400 loss 0.5094877907336093
Epoch 5 num_samples 11500 loss 0.5781348478370183
Epoch 5 num_samples 11600 loss 0.536725263432917
Epo

Epoch 6 num_samples 17800 loss 0.3873287267803305
Epoch 6 num_samples 17900 loss 0.36785725472436
Epoch 6 num_samples 18000 loss 0.3227949845070159
Epoch 6 num_samples 18100 loss 0.43550083524678945
Epoch 6 num_samples 18200 loss 0.26844458451342695
Epoch 6 num_samples 18300 loss 0.4692617447631702
Epoch 6 num_samples 18400 loss 0.4781219250842973
Epoch 6 num_samples 18500 loss 0.38614629500146125
Epoch 7 num_samples 0 loss 0.4002826605251441
Epoch 7 num_samples 100 loss 0.4453257801920135
Epoch 7 num_samples 200 loss 0.34870116815307706
Epoch 7 num_samples 300 loss 0.32257285392136664
Epoch 7 num_samples 400 loss 0.2610056288521303
Epoch 7 num_samples 500 loss 0.27440426990698563
Epoch 7 num_samples 600 loss 0.5416437773327271
Epoch 7 num_samples 700 loss 0.307945570129567
Epoch 7 num_samples 800 loss 0.390701851839331
Epoch 7 num_samples 900 loss 0.3143412736781708
Epoch 7 num_samples 1000 loss 0.3262392742631012
Epoch 7 num_samples 1100 loss 0.34055667925398736
Epoch 7 num_samples 1

Epoch 8 num_samples 6800 loss 0.36775024479479335
Epoch 8 num_samples 6900 loss 0.3075787095124787
Epoch 8 num_samples 7000 loss 0.2978017759131848
Epoch 8 num_samples 7100 loss 0.3354149564748343
Epoch 8 num_samples 7200 loss 0.1829519915212606
Epoch 8 num_samples 7300 loss 0.42286649488978056
Epoch 8 num_samples 7400 loss 0.34653149964918284
Epoch 8 num_samples 7500 loss 0.3073899300586022
Epoch 8 num_samples 7600 loss 0.2564944234354455
Epoch 8 num_samples 7700 loss 0.36060931633674614
Epoch 8 num_samples 7800 loss 0.30080135968180605
Epoch 8 num_samples 7900 loss 0.1894889460526959
Epoch 8 num_samples 8000 loss 0.43721417615137625
Epoch 8 num_samples 8100 loss 0.3764628805333401
Epoch 8 num_samples 8200 loss 0.29009960450135713
Epoch 8 num_samples 8300 loss 0.2870524783846743
Epoch 8 num_samples 8400 loss 0.2725862397711567
Epoch 8 num_samples 8500 loss 0.31485032074753994
Epoch 8 num_samples 8600 loss 0.35087813692967984
Epoch 8 num_samples 8700 loss 0.44954012774836927
Epoch 8 nu

Epoch 9 num_samples 14600 loss 0.3817110497223426
Epoch 9 num_samples 14700 loss 0.3238091259681235
Epoch 9 num_samples 14800 loss 0.3428451536939577
Epoch 9 num_samples 14900 loss 0.21206506210214784
Epoch 9 num_samples 15000 loss 0.29169123477045544
Epoch 9 num_samples 15100 loss 0.21096219599597646
Epoch 9 num_samples 15200 loss 0.39365507756490736
Epoch 9 num_samples 15300 loss 0.2546594147377004
Epoch 9 num_samples 15400 loss 0.20358318210213394
Epoch 9 num_samples 15500 loss 0.21611863226843245
Epoch 9 num_samples 15600 loss 0.25373046061431537
Epoch 9 num_samples 15700 loss 0.27990406122794637
Epoch 9 num_samples 15800 loss 0.2935802855246574
Epoch 9 num_samples 15900 loss 0.4454843112840288
Epoch 9 num_samples 16000 loss 0.38716844052519994
Epoch 9 num_samples 16100 loss 0.26456694729826064
Epoch 9 num_samples 16200 loss 0.3370033911348348
Epoch 9 num_samples 16300 loss 0.4035515313069567
Epoch 9 num_samples 16400 loss 0.44217855896251657
Epoch 9 num_samples 16500 loss 0.333018

Epoch 11 num_samples 3900 loss 0.23297658867905244
Epoch 11 num_samples 4000 loss 0.1793719576086659
Epoch 11 num_samples 4100 loss 0.2180720711203813
Epoch 11 num_samples 4200 loss 0.1859597374657104
Epoch 11 num_samples 4300 loss 0.24348112491674473
Epoch 11 num_samples 4400 loss 0.2013729962367109
Epoch 11 num_samples 4500 loss 0.298657263194674
Epoch 11 num_samples 4600 loss 0.1880286712315039
Epoch 11 num_samples 4700 loss 0.2965680031237046
Epoch 11 num_samples 4800 loss 0.18263138271942134
Epoch 11 num_samples 4900 loss 0.28866144222277107
Epoch 11 num_samples 5000 loss 0.30834378719284156
Epoch 11 num_samples 5100 loss 0.3258543257967493
Epoch 11 num_samples 5200 loss 0.24668820221034105
Epoch 11 num_samples 5300 loss 0.2557007239214959
Epoch 11 num_samples 5400 loss 0.23602238916940238
Epoch 11 num_samples 5500 loss 0.2497161693074269
Epoch 11 num_samples 5600 loss 0.32543253275301454
Epoch 11 num_samples 5700 loss 0.31194990041686826
Epoch 11 num_samples 5800 loss 0.241410294

Epoch 12 num_samples 11600 loss 0.2289703566578429
Epoch 12 num_samples 11700 loss 0.14500024734335298
Epoch 12 num_samples 11800 loss 0.1787096385118469
Epoch 12 num_samples 11900 loss 0.17282595975772835
Epoch 12 num_samples 12000 loss 0.23706529905262413
Epoch 12 num_samples 12100 loss 0.19051538389116382
Epoch 12 num_samples 12200 loss 0.17268710912283886
Epoch 12 num_samples 12300 loss 0.2619578520950826
Epoch 12 num_samples 12400 loss 0.25885293996628667
Epoch 12 num_samples 12500 loss 0.1688040915418607
Epoch 12 num_samples 12600 loss 0.2149369937342421
Epoch 12 num_samples 12700 loss 0.2366195409488975
Epoch 12 num_samples 12800 loss 0.18007658977523625
Epoch 12 num_samples 12900 loss 0.30487019980383745
Epoch 12 num_samples 13000 loss 0.18685875354734066
Epoch 12 num_samples 13100 loss 0.13929858470785128
Epoch 12 num_samples 13200 loss 0.31597675840045353
Epoch 12 num_samples 13300 loss 0.1696620818380528
Epoch 12 num_samples 13400 loss 0.22559528539155302
Epoch 12 num_sample

Epoch 13 num_samples 17500 loss 0.17586220363326163
Epoch 13 num_samples 17600 loss 0.18979095266041926
Epoch 13 num_samples 17700 loss 0.19910701397523653
Epoch 13 num_samples 17800 loss 0.17732828090291758
Epoch 13 num_samples 17900 loss 0.19459330884040793
Epoch 13 num_samples 18000 loss 0.14150375087056236
Epoch 13 num_samples 18100 loss 0.21680050280881225
Epoch 13 num_samples 18200 loss 0.13802024693146303
Epoch 13 num_samples 18300 loss 0.22534287374389528
Epoch 13 num_samples 18400 loss 0.20598463887384072
Epoch 13 num_samples 18500 loss 0.1648779130400806
Epoch 14 num_samples 0 loss 0.24250283470881925
Epoch 14 num_samples 100 loss 0.26173881762615975
Epoch 14 num_samples 200 loss 0.1952321904525772
Epoch 14 num_samples 300 loss 0.15590725260653576
Epoch 14 num_samples 400 loss 0.15182585041015495
Epoch 14 num_samples 500 loss 0.15109964363000472
Epoch 14 num_samples 600 loss 0.26677565732771347
Epoch 14 num_samples 700 loss 0.1821584918564487
Epoch 14 num_samples 800 loss 0.1

Epoch 15 num_samples 4000 loss 0.1252872885157777
Epoch 15 num_samples 4100 loss 0.16694916214281894
Epoch 15 num_samples 4200 loss 0.12492658422788513
Epoch 15 num_samples 4300 loss 0.16468784849713394
Epoch 15 num_samples 4400 loss 0.14914153316130277
Epoch 15 num_samples 4500 loss 0.20519347799094753
Epoch 15 num_samples 4600 loss 0.12186285213967775
Epoch 15 num_samples 4700 loss 0.21151817471310658
Epoch 15 num_samples 4800 loss 0.1175187113552613
Epoch 15 num_samples 4900 loss 0.18856503504843766
Epoch 15 num_samples 5000 loss 0.21113771978914164
Epoch 15 num_samples 5100 loss 0.21619084825455065
Epoch 15 num_samples 5200 loss 0.16717691991260694
Epoch 15 num_samples 5300 loss 0.1885554217245607
Epoch 15 num_samples 5400 loss 0.14103265125629919
Epoch 15 num_samples 5500 loss 0.19888201882559875
Epoch 15 num_samples 5600 loss 0.242193996311641
Epoch 15 num_samples 5700 loss 0.21672975849103623
Epoch 15 num_samples 5800 loss 0.17115634362917487
Epoch 15 num_samples 5900 loss 0.149

Epoch 16 num_samples 9600 loss 0.1198174226415426
Epoch 16 num_samples 9700 loss 0.2237171457197982
Epoch 16 num_samples 9800 loss 0.07713164963341569
Epoch 16 num_samples 9900 loss 0.22241137615458817
Epoch 16 num_samples 10000 loss 0.1033163774160739
Epoch 16 num_samples 10100 loss 0.1791597606250469
Epoch 16 num_samples 10200 loss 0.14863535020686225
Epoch 16 num_samples 10300 loss 0.09935758205584207
Epoch 16 num_samples 10400 loss 0.19280927284635896
Epoch 16 num_samples 10500 loss 0.18294828805368263
Epoch 16 num_samples 10600 loss 0.1703776962264088
Epoch 16 num_samples 10700 loss 0.18152335342663356
Epoch 16 num_samples 10800 loss 0.11380446345880625
Epoch 16 num_samples 10900 loss 0.25894117352269824
Epoch 16 num_samples 11000 loss 0.1915541202566452
Epoch 16 num_samples 11100 loss 0.11764689422557406
Epoch 16 num_samples 11200 loss 0.2679446152221758
Epoch 16 num_samples 11300 loss 0.12448733529647715
Epoch 16 num_samples 11400 loss 0.14704815969269616
Epoch 16 num_samples 11

Epoch 17 num_samples 15800 loss 0.13476897748499567
Epoch 17 num_samples 15900 loss 0.2094163467151931
Epoch 17 num_samples 16000 loss 0.17824199318201173
Epoch 17 num_samples 16100 loss 0.12402461958626067
Epoch 17 num_samples 16200 loss 0.1470949275655349
Epoch 17 num_samples 16300 loss 0.20579907688224458
Epoch 17 num_samples 16400 loss 0.20592894647274126
Epoch 17 num_samples 16500 loss 0.1844381113599361
Epoch 17 num_samples 16600 loss 0.17295767971768966
Epoch 17 num_samples 16700 loss 0.12538485763918483
Epoch 17 num_samples 16800 loss 0.15493831805584116
Epoch 17 num_samples 16900 loss 0.15154432912852828
Epoch 17 num_samples 17000 loss 0.16628349591482597
Epoch 17 num_samples 17100 loss 0.10592227152276192
Epoch 17 num_samples 17200 loss 0.11878699407687865
Epoch 17 num_samples 17300 loss 0.12045356140160117
Epoch 17 num_samples 17400 loss 0.13102232606184633
Epoch 17 num_samples 17500 loss 0.11860473787613288
Epoch 17 num_samples 17600 loss 0.1384149029103418
Epoch 17 num_sam

Epoch 19 num_samples 5300 loss 0.1344427592795515
Epoch 19 num_samples 5400 loss 0.08817941800923135
Epoch 19 num_samples 5500 loss 0.15037210457961325
Epoch 19 num_samples 5600 loss 0.1845154833419077
Epoch 19 num_samples 5700 loss 0.15172535685425145
Epoch 19 num_samples 5800 loss 0.1296512222370589
Epoch 19 num_samples 5900 loss 0.10740823208635718
Epoch 19 num_samples 6000 loss 0.15830180308098554
Epoch 19 num_samples 6100 loss 0.10365529989799077
Epoch 19 num_samples 6200 loss 0.14091093861033566
Epoch 19 num_samples 6300 loss 0.17249593390264428
Epoch 19 num_samples 6400 loss 0.11557668841781003
Epoch 19 num_samples 6500 loss 0.14216804894387036
Epoch 19 num_samples 6600 loss 0.07588824767734086
Epoch 19 num_samples 6700 loss 0.15656266037782607
Epoch 19 num_samples 6800 loss 0.11775525208821419
Epoch 19 num_samples 6900 loss 0.12663493868113881
Epoch 19 num_samples 7000 loss 0.10046255062350258
Epoch 19 num_samples 7100 loss 0.1301828111701022
Epoch 19 num_samples 7200 loss 0.06