In [1]:
import numpy as np

with open('data/text_data/japan.txt', 'r') as f:
    txt = f.read()

    X = []
    y = []

    char_to_idx = {char: i for i, char in enumerate(set(txt))}
    idx_to_char = {i: char for i, char in enumerate(set(txt))}

    X = np.array([char_to_idx[x] for x in txt])
    y = [char_to_idx[x] for x in txt[1:]]
    y.append(char_to_idx['.'])
    y = np.array(y)

In [2]:
import impl.layer as l

class GRU:

    def __init__(self, D, H, L, char2idx, idx2char, p_dropout):
        self.D = D
        self.H = H
        self.L = L
        self.char2idx = char2idx
        self.idx2char = idx2char
        self.vocab_size = len(char2idx)
        self.losses = {'train':[], 'smooth train':[]}
        self.p_dropout = p_dropout
        
        # Model parameters
        Z = H + D
        m = dict(
            Wz=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wh=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wy=np.random.randn(H, D) / np.sqrt(H / 2.),
            bz=np.zeros((1, H)),
            bh=np.zeros((1, H)),
            by=np.zeros((1, D))
        )
        
        self.model = []
        for _ in range(self.L):
            self.model.append(m)
            
    def initial_state(self):
        return np.zeros((1, self.H))

    def dropout_forward(self, X, p_dropout):
        u = np.random.binomial(1, p_dropout, size=X.shape) / p_dropout
        # q = 1-p_dropout
        # u = np.random.binomial(1, q, size=X.shape)
        out = X * u
        cache = u
        return out, cache

    def dropout_backward(self, dout, cache):
        dX = dout * cache
        return dX
    
    def selu_forward(self, X):
        alpha = 1.6732632423543772848170429916717
        scale = 1.0507009873554804934193349852946
        out = scale * np.where(X>=0.0, X, alpha * (np.exp(X)-1))
        cache = X
        return out, cache

    def selu_backward(self, dout, cache):
        alpha = 1.6732632423543772848170429916717
        scale = 1.0507009873554804934193349852946
        X = cache
        dX_pos = dout.copy()
        dX_pos[X<0] = 0
        dX_neg = dout.copy()
        dX_neg[X>0] = 0
        dX = scale * np.where(X>=0.0, dX_pos, dX_neg * alpha * np.exp(X))
        return dX

    # p_dropout = keep_prob in this case! 
    # Is this true in other cases as well?
    def alpha_dropout_fwd(self, h, q):
        '''h is activation, q is keep probability: q=1-p, p=p_dropout, and q=keep_prob'''
        alpha = 1.6732632423543772848170429916717
        scale = 1.0507009873554804934193349852946
        alpha_p = -scale * alpha
        mask = np.random.binomial(1, q, size=h.shape)
        dropped = (mask * h) + ((1 - mask) * alpha_p)
        a = 1. / np.sqrt(q + (alpha_p ** 2 * q  * (1 - q)))
        b = -a * (1 - q) * alpha_p
        out = (a * dropped) + b
        cache = (a, mask)
        return out, cache

    def alpha_dropout_bwd(self, dout, cache):
        a, mask = cache
        d_dropped = dout * a
        dh = d_dropped * mask
        return dh
    
    def forward(self, X, h, m, train):
        Wz, Wh, Wy = m['Wz'], m['Wh'], m['Wy']
        bz, bh, by = m['bz'], m['bh'], m['by']

        X_in = X.copy()
        h_in = h.copy()

        X = np.column_stack((h_in, X_in))

        hz, hz_cache = l.fc_forward(X, Wz, bz)
        hz, hz_sigm_cache = l.sigmoid_forward(hz)

        hh, hh_cache = l.fc_forward(X, Wh, bh)
        hh, hh_tanh_cache = l.tanh_forward(hh)

        h = h_in + (hz * (hh - h_in))
        # equal to
        # h = (1.0 - hz) * h_in + hz * hh
        # or
        # h = h_in + (hz * hh)
        # or
        # h = h_in + hz
        # or
        # h = h_in + hh

        # SELU + SELU-Dropout
        y, y_cache = l.fc_forward(h, Wy, by)
        y, y_selu_cache = self.selu_forward(y)
        y = X_in + y
        
        if train: # with Dropout
            y, y_do_cache = self.alpha_dropout_fwd(y, self.p_dropout)
            cache = (h_in, hz, hz_cache, hz_sigm_cache, hh, hh_cache, hh_tanh_cache, y_cache, y_selu_cache, y_do_cache)
        else: # no Dropout: testing or validation
            cache = (h_in, hz, hz_cache, hz_sigm_cache, hh, hh_cache, hh_tanh_cache, y_cache, y_selu_cache)

        return y, h, cache

    def backward(self, dy, dh, cache, train):
        if train:
            h_in, hz, hz_cache, hz_sigm_cache, hh, hh_cache, hh_tanh_cache, y_cache, y_selu_cache, y_do_cache = cache
            dy = self.alpha_dropout_bwd(dout=dy, cache=y_do_cache)
        else:
            h_in, hz, hz_cache, hz_sigm_cache, hh, hh_cache, hh_tanh_cache, y_cache, y_selu_cache = cache
        
        dy_out = dy.copy()
        dh_out = dh.copy()
        
        dy = self.selu_backward(dy, y_selu_cache)
        dh, dWy, dby = l.fc_backward(dy, y_cache)
        
        dh += dh_out
        dh_in1 = dh * (1.0 - hz) # res

        dhh =  dh * hz
        dhh = l.tanh_backward(dhh, hh_tanh_cache)
        dXh, dWh, dbh = l.fc_backward(dhh, hh_cache)
        
        dhz = dh * (hh - h_in)
        dhz = l.sigmoid_backward(dhz, hz_sigm_cache)
        dXz, dWz, dbz = l.fc_backward(dhz, hz_cache)
        
        dX = dXz + dXh
        
        dh_in2 = dX[:, :self.H]
        dh = dh_in1 + dh_in2 # res cells
        
        dX = dX[:, self.H:]
        dX += dy_out # res layers

        grad = dict(Wz=dWz, Wh=dWh, Wy=dWy, bz=dbz, bh=dbh, by=dby)
        
        return dX, dh, grad
    
    def train_forward(self, X_train, h):
        ys, caches = [], []
        h_init = h.copy()
        h = []
        for _ in range(self.L):
            h.append(h_init.copy())
            caches.append([])
            
        for X in X_train:
            X_one_hot = np.zeros(self.D)
            X_one_hot[X] = 1.
            X = X_one_hot.reshape(1, -1)
            for layer in range(self.L):
                y, h[layer], cache = self.forward(X, h[layer], self.model[layer], train=True)
                caches[layer].append(cache)
                X = y.copy()
            ys.append(y)
            
        return ys, caches

    def cross_entropy(self, y_pred, y_train):
        m = y_pred.shape[0]

        prob = l.softmax(y_pred)
        log_like = -np.log(prob[range(m), y_train])
        data_loss = np.sum(log_like) / m

        return data_loss

    def dcross_entropy(self, y_pred, y_train):
        m = y_pred.shape[0]

        grad_y = l.softmax(y_pred)
        grad_y[range(m), y_train] -= 1.0
        grad_y /= m

        return grad_y
    
    def loss_function(self, y_train, ys):
        loss, dys = 0.0, []

        for y_pred, y in zip(ys, y_train):
            loss += self.cross_entropy(y_pred, y)
            dy = self.dcross_entropy(y_pred, y)
            dys.append(dy)
            
        return loss, dys

    def train_backward(self, dys, caches):
        dh, grad, grads = [], [], []
        for layer in range(self.L):
            dh.append(np.zeros((1, self.H)))
            grad.append({key: np.zeros_like(val) for key, val in self.model[layer].items()})
            grads.append({key: np.zeros_like(val) for key, val in self.model[layer].items()})
            
        dXs = []
        for t in reversed(range(len(dys))):
            dy = dys[t]
            for layer in reversed(range(self.L)):
                dX, dh[layer], grad[layer] = self.backward(dy, dh[layer], caches[layer][t], train=True)                
                for key in grad[layer].keys():
                    grads[layer][key] += grad[layer][key]
                dy = dX.copy()
            dXs.append(dX)
                
        return dXs, grads
    
    def test(self, X_seed, h, size):
        chars = [self.idx2char[X_seed]]
        idx_list = list(range(self.vocab_size))
        X = X_seed
        h_init = h.copy()
        h = []
        for _ in range(self.L):
            h.append(h_init.copy())

        for _ in range(size):
            X_one_hot = np.zeros(self.D)
            X_one_hot[X] = 1.0
            X = X_one_hot.reshape(1, -1)
            for layer in range(self.L):
                y, h[layer], _ = self.forward(X, h[layer], self.model[layer], train=False)
                X = y.copy()
                
            prob = l.softmax(y)
            idx = np.random.choice(idx_list, p=prob.ravel())
            chars.append(self.idx2char[idx])
            X = idx

        return ''.join(chars)

    def get_minibatch(self, X, y, minibatch_size, shuffle):
        minibatches = []

        # for i in range(0, X.shape[0] - minibatch_size +1, 1):
        for i in range(0, X.shape[0], minibatch_size):
            X_mini = X[i:i + minibatch_size]
            y_mini = y[i:i + minibatch_size]
            minibatches.append((X_mini, y_mini))

        return minibatches

    def adam_rnn(self, X_train, y_train, alpha, mb_size, n_iter, print_after):
        M, R = [], []
         
        # Hidden layers
        for layer in range(nn.L):
            M.append({key: np.zeros_like(val) for key, val in self.model[layer].items()})
            R.append({key: np.zeros_like(val) for key, val in self.model[layer].items()})
         
        beta1 = .99
        beta2 = .999
        eps = 1e-8
        state = self.initial_state()
        smooth_loss = 1.0
        minibatches = self.get_minibatch(X_train, y_train, mb_size, shuffle=False)

        # Epochs
        for iter in range(1, n_iter + 1):
            # Minibacthes
            for idx in range(len(minibatches)):
                X_mini, y_mini = minibatches[idx]
                ys, caches = self.train_forward(X_mini, state)
                loss, dys = self.loss_function(y_train=y_mini, ys=ys)
                _, grads = self.train_backward(dys, caches)
                self.losses['train'].append(loss)
                smooth_loss = (0.999 * smooth_loss) + (0.001 * loss)
                self.losses['smooth train'].append(smooth_loss)

                for layer in range(nn.L):
                    for key in grads[layer].keys(): #key, value: items
                        M[layer][key] = l.exp_running_avg(M[layer][key], grads[layer][key], beta1)
                        R[layer][key] = l.exp_running_avg(R[layer][key], grads[layer][key]**2, beta2)

                        m_k_hat = M[layer][key] / (1. - (beta1**(iter)))
                        r_k_hat = R[layer][key] / (1. - (beta2**(iter)))

                        self.model[layer][key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + eps)

            # Print loss and test sample
            if iter % print_after == 0:
                print('Iter-{} loss: {:.4f}'.format(iter, loss))
                sample = self.test(X_mini[0], state, size=100)
                print(sample)

In [None]:
# Hyper-parameters
time_step = 10 # width, minibatch size and test sample size as well
num_layers = 10 # depth
n_iter = 10000 # epochs
alpha = 1e-4 # learning_rate
p_dropout = 0.95 # q=1-p, q=keep_prob and p=dropout.
print_after = 10 # n_iter//100 # print training loss, valid, and test
num_hidden_units = 64 # num_hidden_units in hidden layer
num_input_units = len(char_to_idx) # vocab_size = len(char_to_idx)

# Build the network and learning it or optimizing it using SGD
nn = GRU(D=num_input_units, H=num_hidden_units, L=num_layers, p_dropout=p_dropout, char2idx=char_to_idx, idx2char=idx_to_char)

# Start learning using BP-SGD-ADAM
nn.adam_rnn(X_train=X, y_train=y, alpha=alpha, mb_size=time_step, n_iter=n_iter, print_after=print_after)

# # Display the learning curve and losses for training, validation, and testing
# %matplotlib inline
# %config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt

plt.plot(nn.losses['train'], label='Train loss')
plt.plot(nn.losses['smooth train'], label='Train smooth loss')
plt.legend()
plt.show()

Iter-10 loss: 38.0428
c rpihp OdMobeggn fxanr i . he ae s adde hne icsin i ,T leee eca,nc ics P日 apeisieeum e pe Heals  u,l
Iter-20 loss: 34.9018
cr inaes ihF abe ihns ah iinn wiiis aiuion s chstehin nins nrfe nhn crhss iiRn cee ri fn. Jhan inn mn
Iter-30 loss: 36.1230
camy om Eaned Knpov an, whunapiunth Jinlngan romane Tormir thetsledgantsn, aman..,nesd.asg Jtimegnmne
Iter-40 loss: 37.6405
cavorged Jhhirg NFbon Jiibar abgimrd Jhimpe.ehe, Jethe msg,adhe thes ardo本mpeidang a;7steec.nar.gardd
Iter-50 loss: 35.1701
charesW Sscire powthe lgssthe Nhind Jbp.rlich bapupr.'ubim Jhpmphe, a,,dste,-Nariuma, istaglemithe Jp
Iter-60 loss: 33.9476
cethe Easc iomelelte an6s an. WJepannes', JaiFessstse, an7d inn z5cbera47ssdgitteng annigecd anesdJab
Iter-70 loss: 33.2870
cate cate of ant Nuvete Nipenden is in igiK. in in Neeanme) in ucowemaan. A本gema,c JeapapPeapepenadam
Iter-80 loss: 34.3735
clessaeger "ase and Japppen an6ss piEanicand. wands wumad an Jaantange th gante; and日, ma ametesdandt
Iter-90 

Iter-670 loss: 28.4398
c. The G7 and melt  mmmmmmmmmmn4wuAmPiPhiAAAPPhh5PPPPPPPPPPPPPPPPIcPPPPPPPPPPPPPPPPaPPPPPPPPPPPPPPPPP
Iter-680 loss: 20.8436
coked The whe EwK to uut an uhhu lhblur  whith  ch mhh u, mu b cuucituctt, h  ctccctlt caumanPthe ccc
Iter-690 loss: 20.4018
cD indd sorukk ukpr mme phmppkhh mermPttm7tett-m7,, mct mmtt mmemm c5m mmipeemN1ppppebiivananLa';;;;;
Iter-700 loss: 18.3963
cithe C2, me a0P%EP77ttC47...., ccaii77. i4i(7.a........... a.. a. a i4. i77i.. .  mtt .. i. mii.th  
Iter-710 loss: 24.3856
cad The Sovicticact,
t t th chch th th th cct ccc tst thh cccc mmh t cc cc Eccc EEi m it. th ts t EEN
Iter-720 loss: 26.9067
cand an 19 18wy ich as an7 a44Pcc,Pmmm4PaBBPadaa7–wKK:::,B, B7u 77BaB7a44444LDDDDDDDbbbbbbbbbbbbbbbbb
Iter-730 loss: 18.6374
cc 1mpiviiesttimitt Aft mttt umy Amipimttet i  i..11111(((........uuuuu....uu................144411bb
Iter-740 loss: 22.0432
cced Sel' Cittttts tt mbb hit. CCtttt.  .. wat th iII IEMMMMM日MMM


M


h




l日M日MMl
l
wwt

MM

MM




Iter-1330 loss: 19.2531
conghy Sa. 2aetttt..omtmm. mmamc. thart..tttttmt. . . t. Amatt mh t mt t t  t  t tt. a tt c  o Pl mo 
Iter-1340 loss: 24.3353
cakite Waresef BGuu we whcccteccttcemacc. Japhd th P ncchcce. a  a he Pi c PP PPhhPPPPccl PP c le PeP
Iter-1350 loss: 23.7576
cen, 4in Iestededmederlddedda d-dwwddd ldddwdddddddddddddd adwawwP2 w–w w w wh w    , t w    – w   w 
Iter-1360 loss: 18.2596
ciled o p cec nn 4sctonon c cttarett cst cocortcttoctcetthet cich a h  c  ceccaccacaccaet cII  iha bh
Iter-1370 loss: 20.2070
can Cale wouut toxlat.. . caean t aLccattt   atatIttt t   I I IIL IIIittttttottttttcLItttI ttt ttt7 c
Iter-1380 loss: 31.9639
cite if Nterere teriiiereetioaJapineie nn N P n7nn"nnrar77 me mecitmicintmoipppiiiiteeeee mereriiimii
Iter-1390 loss: 18.0209
c Wese a e  e e nlcd     ddarLadddedanee ae dan p 5 e   c c   c ce    ccie5ciccecic t   ic d d SW5t5w
Iter-1400 loss: 24.4642
cy 1nte wencade c an ccy Ioecicenecaacalanccacac iy calcy nnInonnn IilaItPaIiaciIntcnncaclannc

Iter-1990 loss: 19.9810
c ind man inid iae hinece inecedeccece aice ictennnnchedcccccccccadaccccccceneccecnhac cc anocaaacccc
Iter-2000 loss: 25.3755
c and Hox, meeseeeogongong ses nben uren A mheeee-aeeeeeeeeoo77co777ra7oracoror a ar ces ancoroco ace
Iter-2010 loss: 23.5316
ccpionde in iawSnnan bend no w ccccccccccccSoboslalyllll lal lll ylllllllll lll lll 4lSllloJFlunll lt
Iter-2020 loss: 20.2069
ch faked Hce cecedectecwcc.cticic as .. cuuu..u.... incclunucuthacuwPccccuPcPPPuloIuluccluccccccllccc
Iter-2030 loss: 18.6735
cel In OrdooooopeieopeniepapeppppenemmppoppGgAJGenGgGGrGPJPPPPPapapGJppppapppaJppapaPPGpPaJaJGJaJppap
Iter-2040 loss: 14.0727
cal Tan" IIs" Iuci4cl an caccccciccccccddd ccuuuuIc wImI JGcu lIlwp
PPzuPwwF
PPPLu.
PwP日u本
uwL日uww P

Iter-2050 loss: 25.5325
ce 19Pad hanseane k avayenyeeyeeyBe eeeeeeeeeeyeeeeeee ee e   iee%eCaBBeaeaaaaaBeayGadananP %yS7  77 
Iter-2060 loss: 24.3947
cationd in nandiretoneseorperBanaIapppppppppppapppaaapapapppapapapapapapapapapapapapappapapppp

Iter-2650 loss: 14.9946
cami, aSan a7 aachheeeeaaaa JanJe te–te Ja4aan44atJakaataataacaattaataJJacacJJakcatJcaaaJcJaJJJJaJJea
Iter-2660 loss: 20.4441
ct Ib t 20h  ib bbibimbinnilcalbhllinh bun mapPPPuPPPPPPupPlpl inP PPPntnnPFunnnPPPPPPPPFPPPP1PPPPPPP
Iter-2670 loss: 24.2083
crent wnd mh  Eophaothedhochuthd onccode EhhdhEth iiczgeccoAmtttceeeeceeiutcocheEEtemosmeucEEpmmm pum
Iter-2680 loss: 14.5417
cent vinf t Imocccotefoc as  sst..s....... A. itmmt ma  mmmmmmmmmmpmmac. m.ntuucm nmuuuuuumncmmppmcm.
Iter-2690 loss: 12.7470
cg TFy wor
 tst mhit in imtixt i tt t eeitttiottttt cti tt tte  thnt t  tttt  o te tec tttttttttttttt
Iter-2700 loss: 18.2898
caml SokekMull tey bect-on  Jen intt tantttte  t tttttt utttt ty inyttt ltt tint-n ct ce. -n zen tcn 
Iter-2710 loss: 35.6607
cippleulof th pad Jaratetaaaoayaooyryaytato Jh rrd rrter odtroriddaorrorrrororero t Jap1;;PPa;k;PPP p
Iter-2720 loss: 16.1616
cagent if e fhotyehoaoe tr  m               99   y919 e h91  9194a4hy h999999ty449999949499999

Iter-3310 loss: 15.5648
ctieun,sa a Sigg neghenfntao Jogorokear menorereug mes Jannmentakenke Jon JentaJatmantaneJeeJoJekooee
Iter-3320 loss: 21.0366
ceimWout Sattmaanananddddndddddddd,dmmdmddaxtmea 1min ama parictinammanJannd tot on Jankr n kmJ nnt n
Iter-3330 loss: 13.7041
ced Japangacanan1a 1apan Wa4aSaaaaaaaanaaaaaaaaaaaPaaPanataca;aaaPaaaa4a4a4;mmmm4aimammmaa6a4a:aaaaaa
Iter-3340 loss: 23.1945
ceksuny countmy thy tL   m ry m t    mLa  aLJL JL mLm  a  a  4m444 Jan 4mp44i4a m 4L444 L4 1Ja 111111
Iter-3350 loss: 12.0643
cett Cve fethivestntct.stymmmt mntymy minntymmy 4mtymanh4nnLinti4nt 441nttt 4intyn1minntttttim1n 1ntn
Iter-3360 loss: 20.1126
centom-SWSc' ' tmmhedtccdenedcdedteectecededeedtdednedimcce Jant anen Jame "an JAm1SJapaeeeennnennnee
Iter-3370 loss: 20.5076
ce aft 4elen nifirected Sict,cececc mmlettt m mech47chicid mecuncoteace–mmeaengut menticcacthLrceumL 
Iter-3380 loss: 20.5639
chinok zit t metyytyyy my pittimyyyyyy myyyy0nyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy

Iter-3970 loss: 21.0352
ccpan inttrr rinas ritithore mmpumitteke m c tg nt cettttt     t     t m c mt mttttotiditt tttttcttcc
Iter-3980 loss: 8.9259
certh u0t in ti te tttmet win intmin t pan Jannd Jannn nnJnnn Jinn Jipnnn Jin Jnnnn Jhn J tn  A.J  t 
Iter-3990 loss: 15.8842
cergc ffd mhete eec fkecdo   e towhe ph  L Lom mhe the pa   pmmh pett pethmc pemheJJaJpp Jpp chJJc J 
Iter-4000 loss: 13.8047
carg the wat me'tuune ce me me tee me met on  a ped nnn Jann 1nn 1nn minnan  icuuc ur an nnn  J cacnn
Iter-4010 loss: 12.6320
cCliretimyfdy. beiticistt . teaee t bettt. it    .  y   . ....  ..y.  y. y . t....... .  .. ....y ...
Iter-4020 loss: 25.9091
calg GD se Sn mm c mit m t t micic min mLi  2 mi linc cmPin Pmmmt m g mgcccimceteiiheticc cc 4cime mi
Iter-4030 loss: 12.9759
cgriopprong piniont enteririme Jen petipinte und JLee Jened Mud Jed.uJed.uau........da i... i..t Juu.
Iter-4040 loss: 32.6405
cam Ga the markar cer mer m m k t m m mik ch m  t methr the e emir mtietLtotodededtdedddeceemim

Iter-4630 loss: 14.7362
ceri% more mesomerom mamer met met mit mat mest pom t m mert mut ment mur mom meth mitom met the mer 
Iter-4640 loss: 10.7926
cum1 Waoply's Was (n r th Wes ras ras ir s min is in re mad ra an 1 t r in m th We in m W n Wa W n in
Iter-4650 loss: 14.8456
catle Soupooaann ly W mionee nnt nant Sth men lantt ind on men r nt rint nn the Se tont tent ian men 
Iter-4660 loss: 9.4301
crexndy memed om the Se Se Sad Se St mas on the tin ti ted ted m the t Se the Sed It Id m the ned bed
Iter-4670 loss: 23.7745
cinly In mmd thk EE4 nn n5d mmt  mhe Ammn nhe An a    n med met miathr monhe mmce and Jan add the nnn
Iter-4680 loss: 19.8956
ch loglent ot rith es ts mes nkonilinan mantar rimmunec mont mese Smin Skan Sked nded mesute than SSm
Iter-4690 loss: 17.0291
c the GlePes'c Pas P7t7masesut m7sP P7P P7P P7PPPh P7t7P7P Paut P7t7P7P7PP ant P7c Ph7 P8nP Ph Ph8nP 
Iter-4700 loss: 13.5650
camec , lith ons Se the Som th Sh c So t Ph lh t th thh th lLc mLLit t JLan am JL nakh JL J 55 

Iter-5290 loss: 13.4503
cte the Eil on histan the pomid in ODD. iDDD mad mas an min pad in ind in 1m8dicic Oce On 166–D –0–––
Iter-5300 loss: 19.0052
ceni1f8 rith Smed Sretaticy camcy Syccyyyelyyy She SSey Sn Scen tce SSeccy Sced yncad Sccccrccc Seccc
Iter-5310 loss: 27.3221
che 12555t cce cacent ct tec cccttttec cctttcet cecttte  e c t' cat nnttcccctectttttttttttttece c  ta
Iter-5320 loss: 17.5873
catu% of Jakek pkas  pas of ik kke n tin the nkke Sk nes S n7k man kek Sokkk Jakke nm Skkakik cek man
Iter-5330 loss: 19.7502
ccimes fommirig mmites mimg fim es me m7 mimd Sn mintirir Ssireritr ct mener conic Sat Stt Smor Smter
Iter-5340 loss: 21.9652
ckacaca0in, if Nithh ons in power ete nnt nhes on eterr men res it nhet Jap pe pen in un ra pa Sn per
Iter-5350 loss: 13.1871
couma Gm apmpapekaL zzzzttttttz.zzzz  e . t    t   te m pttt e te ie ti e teae i   t  a te ae tatLe t
Iter-5360 loss: 16.3501
cekaae The parope Cop on in in in l th th th Se W ch Se St ne we th la w n the th Se Se nic nd

Iter-5950 loss: 15.0877
c 2l ith ile the te tLe the the nteg imm n meg mes meg leg adhe um7m7 mel mas ame 47 mat mL meee eaae
Iter-5960 loss: 12.8968
cedasg vh'l mad sg nges ust mhesl ugeuumeu the t Phe Pee eee bee aut ul tec cgg ll bue te Peeuu Ped l
Iter-5970 loss: 22.8447
c last ind Anss Ax48st Att. An Ant Att che At Stet Ait She Stcth ctet Act St. cmec. Sh ctc.. Ate.. Wm
Iter-5980 loss: 17.8828
cthe Etis on  terone firth tha the wist esaiates thed thered Woahesothes Ssted tirotetes isoeeder rer
Iter-5990 loss: 11.2816
cano mmm kak kik par am miamit the mim me pimm ma par in ire eee ipe pee 4as a peomm pa p p'sian of e
Iter-6000 loss: 13.5850
cer Courm7nt myrt myy mAn i4AthL bithLL thet th7LLLy b7j 47j47–47 4747–47–47–4m7–47–47–4Ly bbbi47–47–
Iter-6010 loss: 18.2336
c 2upelle tar west a de ee wedaaed ire ee ns me timike the Ee Se Se Sicct in the the has ans mmmaamom
Iter-6020 loss: 9.7681
c marparee uge


. Janac mac linle in i% in inl Hauc. mang thicicilll me. milint cicilly wimac 

Iter-6610 loss: 23.3510
cal the woble Mhe Mel Is the hhe Mh phe Mede the he the the the wie the. Ice ice the eow ea aic in th
Iter-6620 loss: 10.1205
cer Oo intir the th'n in  me iN'im Ne n me mLe h'n im nee m's t'n 4'n Se Se t'nine in na  me Sin mee 
Iter-6630 loss: 20.0463
c Coplllowad Welipal il is We Wis Wis lid ees is eas of tin Was Wins Wess an an in in in Wa 1 Wa, Wa 
Iter-6640 loss: 17.4218
cy Pee watte an las in in th' n he in the na  a ee aa e  a ae iet iet We aa'   a a'  a tat We as h's 
Iter-6650 loss: 26.8862
c 2lGGd 9t wouud Re sogili.t as.. Ast 
g
. Au.. 4m.. Py t m. mmmam Pte thent mammiatth ant uc. int ut
Iter-6660 loss: 18.3070
c 2umdanand'Ides ase
 theecet cesses.
e0a0Het iMly in an en ele 1. Sin ce in Pe neee cane Phece in an
Iter-6670 loss: 15.6519
ceant Rn mhe ne , Janaamanaaaaaaaaaaaaa–a–aaaaa anaaaaaaaaaaakakakanaaateaamaaaaaaaaanananananaranan 
Iter-6680 loss: 18.5322
c the warlealel ale alad aslee ael aled har est thet as h as. mhtent an tha the thas that an. 

Iter-7270 loss: 17.3389
c larese at in. hes sis in thed Se he ter the the te nn the n Se in in th' the the te the the te thin
Iter-7280 loss: 24.6799
cchicccinde mand in the REthomee mmmin mitintt mint mitt an mintitim ment imSt metimmiunatiunamtmanr 
Iter-7290 loss: 16.5365
c manunmed canet in the te the we mes Wes heseatest the pes the the the eese eeee the teet the theee 
Iter-7300 loss: 19.3048
ce wivc'd'b b Mygyeeetttteetceceeteceyyucuu muimiteeeeeye mi  mtyyyyyyyeyyamte eyyyteyyyyyy2–myyeyyt–
Iter-7310 loss: 15.7538
ch urlel int men nnt my nc mermelin bet menun mes met bint mirlimin minter ment min thee-cin tenc-men
Iter-7320 loss: 16.3032
car We matt the Med in the iith ma. ad mint ta the then mat the te. tit me pe nib thh tit nn in the t
Iter-7330 loss: 16.1581
ccry 18tecece mant cre thent ante the the the Phe n nnt the thent the thent ant the the th' the nn th
Iter-7340 loss: 12.4099
cas ent rens Japensese becile is the Phe the WaN meses mames mestuxe eet Nixt the emese JapaN 

  return 1. / (1 + np.exp(-X))


Iter-7590 loss: 18.4383
catexBeRlaand in JLpan han anh ns Sas ras Sin Sau ans Sar ras Sane San SS an ans int We Sar an Siriar
Iter-7600 loss: 25.7829
cingalledls wllent than 1 i0th W mtt mnt micantiand and mtomaticolil in g anti–n the 1 ce nt nice lir
Iter-7610 loss: 19.7248
c las the Sir testired Wes mestirre the nSanter FiPerce met an Nent in in rint ith Nee in the the the
Iter-7620 loss: 22.5870
carg bgjs us thiping it it n tit th' titL ma0 19th JL wL a5 JL a in in u5 in an in  E9n n ap i. in in
Iter-7630 loss: 15.2140
ct eunaaci Hekedikkad SakkoSresur Sekemimer Saket Skkekkr Sekkkkar Skelikk eatituumeroumeuramamumimer
Iter-7640 loss: 22.9748
c Wasg Wes in in the in W's nkeus insent inesl Womanthentean int pinntirint cinte in int tinnd intinn
Iter-7650 loss: 18.6964
came woubl –nt JLpaniJap Jap pan Jap pamiJL mim bam mam p mamimi me 1m m mg bam men mibim me mime mam
Iter-7660 loss: 17.3678
cery ag bally mestum JLapapapap papaJapaapaicamacaman ans it ist iss Nes antt ces Ja4aa 4aaaJa

Iter-8250 loss: 15.4483
conerigh me diteri n the Me Nobby SSther wetibe ciuf Jan Jan Nin NanaanaN ann NariNand Nimin in ped –
Iter-8260 loss: 29.1962
c the world trar and int tint in in an in in ind 1 in int n ant 1 te ne ind te ne an te nin e te in a
Iter-8270 loss: 19.8121
caow-d'ribapbaaly' baraGGGGGGGGGGGy5bby 20 20 15 papa pat'  papapana55a a 2me iniae –man papa a pe 4a
Iter-8280 loss: 29.7058
cesersss ulect t ankth Sresssmkkerar SSkke kkarant rikkaauksst SSirk Sirkkrkkmnkirkkkrrrstd SSiktanke
Iter-8290 loss: 22.1937
c marclesten the EE melired les the Sestes thesse the Eesl the Ele Eeus tes.. pes messe the tir the t
Iter-8300 loss: 26.2898
ca uw call o0bbes bas mas masLese in SW WiNiin nnhL Was in han SiWase Was in in SiniWan iN in dan in 
Iter-8310 loss: 25.4270
cinekutlt 10000––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
Iter-8320 loss: 21.1857
cemt lokl Wan the Wasf of th' Wir th n st s th nm Wes Wess s se West mal in s this ns ss thes 

Iter-8910 loss: 26.7519
ca the Mowedadithal bole luret th' ne bb'sace bun mer'in lelty meeere ta ler metipen J'p pe pe in te 
Iter-8920 loss: 19.0556
c. The chec tha nath linnt pe J'r J'a-J'n Le Ja ta ce J'n Jaaan-ranner Jan Nun-ras Jan-J'n-J'n J' p S
