In [14]:
import os
import numpy as np
from collections import Counter

class Dataset():

    def __init__(self, batch_size=32, sequence_length=30):
        self.batch_size = batch_size
        self.sequence_length = sequence_length
        self.current_batch_index = 0

    def preprocess(self, input_file):
        with open(input_file, "r") as f:
            data = f.read()

        # count and sort most frequent characters
        cntr = Counter(data)
        self.sorted_chars = sorted(cntr.keys(), key=cntr.get, reverse=True)

        # self.sorted chars contains just the characters ordered descending by frequency
        self.char2id = dict(zip(self.sorted_chars, range(len(self.sorted_chars)))) 
        # reverse the mapping
        self.id2char = {k:v for v,k in self.char2id.items()}
        # convert the data to ids
        self.x = np.array(list(map(self.char2id.get, data)))

    def encode(self, sequence):
        # returns the sequence encoded as integers
        return np.array(list(map(self.char2id.get, sequence)))

    def decode(self, encoded_sequence):
        # returns the sequence decoded as letters
        return np.array(list(map(self.id2char.get, encoded_sequence)))
    
    def decode_str(self, encoded_sequence):
        # returns the sequence decoded as string
        return "".join(self.decode(encoded_sequence))

    def create_minibatches(self):
        # shift data
        shifted_x = self.x[:-1]
        shifted_y = self.x[1:]
        # batch shape and total size
        batch_shape = (self.batch_size, self.sequence_length)
        step = self.batch_size * self.sequence_length

        self.num_batches = int(len(shifted_x) / step) 
        self.batches = []

        for batch_index in range(self.num_batches):
            batch_start_pos = batch_index * step
            batch_end_pos = batch_start_pos + step

            batch_x = np.array(shifted_x[batch_start_pos:batch_end_pos]).reshape(batch_shape)
            batch_y = np.array(shifted_y[batch_start_pos:batch_end_pos]).reshape(batch_shape)
            self.batches.append((batch_x, batch_y))

    def minibatch_generator(self):
        for epoch, (batch_x, batch_y) in enumerate(self.batches):
            new_epoch = epoch == 0
            yield new_epoch, batch_x, batch_y
            
    def next_minibatch(self):
        new_epoch = self.current_batch_index == 0
        batch_x, batch_y = self.batches[self.current_batch_index]
        self.current_batch_index += 1

        if self.current_batch_index >= self.num_batches:
            self.current_batch_index = 0

        return new_epoch, batch_x, batch_y




In [15]:
from sklearn.metrics import log_loss

def softmax(value):
    max_value = np.max(value, axis=2)
    max_value = max_value[:, :, np.newaxis] # Broadcast it manually
    exp = np.exp(value - max_value)
    return exp / np.sum(exp, axis=2, keepdims=True)

class SimpleRNN:

    def __init__(self, hidden_size, sequence_length, vocab_size, learning_rate):
        self.hidden_size = hidden_size
        self.sequence_length = sequence_length
        self.vocab_size = vocab_size
        self.learning_rate = learning_rate
        
        hidden_xavier_scale = 1.0 / np.sqrt(hidden_size)
        self.U = np.random.normal(size=[vocab_size, hidden_size], scale=hidden_xavier_scale) # ... input projection
        self.W = np.random.normal(size=[hidden_size, hidden_size], scale=hidden_xavier_scale) # ... hidden-to-hidden projection
        self.b = np.zeros([1, hidden_size]) # ... input bias

        self.V = np.random.normal(size=[hidden_size, vocab_size], scale=1.0 / np.sqrt(vocab_size)) # ... output projection
        self.c = np.zeros([1, vocab_size]) # ... output bias

        # memory of past gradients - rolling sum of squares for Adagrad
        self.memory_U, self.memory_W, self.memory_V = np.zeros_like(self.U), np.zeros_like(self.W), np.zeros_like(self.V)
        self.memory_b, self.memory_c = np.zeros_like(self.b), np.zeros_like(self.c)

    def rnn_step_forward(self, x, h_prev, U=None, W=None, b=None):
        # A single time step forward of a recurrent neural network with a 
        # hyperbolic tangent nonlinearity.

        # x - input data (minibatch size x input dimension)
        # h_prev - previous hidden state (minibatch size x hidden size)
        # U - input projection matrix (input dimension x hidden size)
        # W - hidden to hidden projection matrix (hidden size x hidden size)
        # b - bias of shape (hidden size x 1)
        
        # Extract values if given
        U = self.U if U is None else U
        W = self.W if W is None else W
        b = self.b if b is None else b

        h_current = np.tanh(np.dot(h_prev, W) + np.dot(x, U) + b)
        cache = (h_current, h_prev, x)
        return h_current, cache

    def rnn_forward(self, x, h0, U=None, W=None, b=None):
        # Full unroll forward of the recurrent neural network with a 
        # hyperbolic tangent nonlinearity

        # x - input data for the whole time-series (minibatch size x sequence_length x input dimension)
        # h0 - initial hidden state (minibatch size x hidden size)
        # U - input projection matrix (input dimension x hidden size)
        # W - hidden to hidden projection matrix (hidden size x hidden size)
        # b - bias of shape (hidden size x 1)
        
        # Extract values if given
        U = self.U if U is None else U
        W = self.W if W is None else W
        b = self.b if b is None else b

        h, cache = [h0], []
        sequences = x.transpose(1, 0, 2)
        
        for sequence in sequences:
            h_current, cache_current = self.rnn_step_forward(sequence, h[-1], U, W, b)
            cache.append(cache_current)
            h.append(h_current)
        
        # Skip initial hidden state
        h = np.array(h[1:]).transpose(1, 0, 2)

        # return the hidden states for the whole time series (T+1) and a tuple of values needed for the backward step
        return h, cache

    def rnn_step_backward(self, grad_next, cache):
        # A single time step backward of a recurrent neural network with a 
        # hyperbolic tangent nonlinearity.

        # grad_next - upstream gradient of the loss with respect to the next hidden state and current output
        # cache - cached information from the forward pass
        h_current, h_prev, x = cache

        # compute and return gradients with respect to each parameter
        da = grad_next * (1 - h_current**2)
        dh_prev = np.dot(da, self.W.T)
        dU = np.dot(x.T, da) / grad_next.shape[0]
        dW = np.dot(h_prev.T, da) / grad_next.shape[0]
        db = np.sum(da, axis=0) / grad_next.shape[0]

        return dh_prev, dU, dW, db

    def rnn_backward(self, dh, cache):
        # Full unroll forward of the recurrent neural network with a 
        # hyperbolic tangent nonlinearity
        
        dU, dW, db = np.zeros_like(self.U), np.zeros_like(self.W), np.zeros_like(self.b)

        # compute and return gradients with respect to each parameter
        # for the whole time series.
        # Why are we not computing the gradient with respect to inputs (x)?
        dh = dh.transpose(1, 0, 2)

        dh_prev = np.zeros_like(dh[-1])
        for dh_current, cache_current in reversed(list(zip(dh, cache))):
            dh_prev, dU_current, dW_current, db_current = self.rnn_step_backward(dh_current + dh_prev, cache_current)
            dU += dU_current
            dW += dW_current
            db += db_current

        return dU, dW, db

    def output(self, h, V=None, c=None):
        # Calculate the output probabilities of the network
        # Extract values if given
        V = self.V if V is None else V
        c = self.c if c is None else c
        logits = np.dot(h, V) + c
        return softmax(logits)

    def output_loss_and_grads(self, h, y, V=None, c=None):
        # Calculate the loss of the network for each of the outputs
        
        # h - hidden states of the network for each timestep. 
        #     the dimensionality of h is (batch size x sequence length x hidden size (the initial state is irrelevant for the output)
        # V - the output projection matrix of dimension hidden size x vocabulary size
        # c - the output bias of dimension vocabulary size x 1
        # y - the true class distribution - a tensor of dimension batch_size x sequence_length x vocabulary size

        # Extract values if given
        V = self.V if V is None else V
        c = self.c if c is None else c

        batch_size = h.shape[0]
        y_out = self.output(h, V, c)

        log_loss_ = log_loss(y.reshape(-1, self.vocab_size), y_out.reshape(-1, self.vocab_size))
        loss = log_loss_ * self.sequence_length  # Since it computes average cross_entropy loss
        d_out = y_out - y
        
        dh, dV, dc = [], np.zeros_like(V), np.zeros_like(c)
        
        for d_out_current, h_current in zip(d_out.transpose(1, 0, 2), h.transpose(1, 0, 2)):
            dV += np.dot(h_current.T, d_out_current) / batch_size
            dc += np.average(d_out_current, axis=0)
            dh.append(np.dot(d_out_current, V.T))

        dh = np.array(dh).transpose(1, 0, 2)

        return loss, dh, dV, dc

    def update(self, dU, dW, db, dV, dc, eps=1e-6):
        # update memory matrices
        self.memory_U += np.square(dU)
        self.memory_W += np.square(dW)
        self.memory_b += np.square(db)
        self.memory_V += np.square(dV)
        self.memory_c += np.square(dc)
        
        # perform the Adagrad update of parameters
        self.U -= self.learning_rate * dU / np.sqrt(self.memory_U + eps)
        self.W -= self.learning_rate * dW / np.sqrt(self.memory_W + eps)
        self.b -= self.learning_rate * db / np.sqrt(self.memory_b + eps)
        self.V -= self.learning_rate * dV / np.sqrt(self.memory_V + eps)
        self.c -= self.learning_rate * dc / np.sqrt(self.memory_c + eps)

    def step(self, h0, x_oh, y_oh):
        h, cache = self.rnn_forward(x_oh, h0)
        loss, dh, dV, dc = self.output_loss_and_grads(h, y_oh)
        dU, dW, db = self.rnn_backward(dh, cache)

        dU = np.clip(dU, -5, 5)
        dW = np.clip(dW, -5, 5)
        db = np.clip(db, -5, 5)
        dV = np.clip(dV, -5, 5)
        dc = np.clip(dc, -5, 5)

        self.update(dU, dW, db, dV, dc)
        return loss, h[:, -1, :]

In [18]:
def one_hot(batch, vocab_size):

    def _oh(x, vocab_size):
        x_oh = np.zeros((x.shape[0], vocab_size))
        x_oh[np.arange(x.shape[0]), x] = 1
        return x_oh

    if batch.ndim == 1:
        return _oh(batch, vocab_size)
    else:
        return np.array([_oh(s, vocab_size) for s in batch])

def sample(seed, n_sample, rnn):
    h0 = np.zeros((1, rnn.hidden_size))
    seed_onehot = one_hot(seed, rnn.vocab_size)

    h = h0
    for char in seed_onehot:
        h, _ = rnn.rnn_step_forward(char[np.newaxis, :], h)

    sample = np.zeros((n_sample, ), dtype=np.int32)
    sample[:len(seed)] = seed
    for i in range(len(seed), n_sample):
        # Calculate probabilistic output
        model_out = rnn.output(h[np.newaxis, :, :])
        # Choose next letter in sample with defined probabilty
        sample[i] = np.random.choice(np.arange(model_out.shape[-1]), p=model_out.ravel())
        # Forward current letter
        next_input = np.zeros((1, rnn.vocab_size))
        next_input[0, sample[i]] = 1
        h, _ = rnn.rnn_step_forward(next_input, h)

    return sample

def run_language_model(dataset, max_epochs, seed, hidden_size=100, sequence_length=30, learning_rate=1e-1, sample_every=100):
    vocab_size = len(dataset.sorted_chars)
    RNN = SimpleRNN(hidden_size, sequence_length, vocab_size, learning_rate)

    current_epoch = 0 
    batch = 0

    h0 = np.zeros((dataset.batch_size, hidden_size))

    average_loss = 0

    while current_epoch < max_epochs: 
        next_epoch, x, y = dataset.next_minibatch()
        
        if next_epoch:
            batch = 0
            current_epoch += 1
            h0 = np.zeros_like(h0)

        x_oh, y_oh = one_hot(x, vocab_size), one_hot(y, vocab_size)
        loss, h0 = RNN.step(h0, x_oh, y_oh)
        average_loss = 0.9 * average_loss + 0.1 * loss

        if batch % sample_every == 0: 
            print("epoch: {3}/{4}, batch: {0}/{1}, loss: {2}, avg loss: {5}"
                    .format(batch, dataset.num_batches, loss, current_epoch, max_epochs, average_loss))
            sample_encoded = sample(dataset.encode(seed), 200, RNN)
            print(dataset.decode_str(sample_encoded))

        batch += 1

if __name__ == "__main__":
    # Prepare dataset
    dataset = Dataset(32,30)
    dataset.preprocess("data/selected_conversations.txt")
    dataset.create_minibatches()
    # Start learning
    run_language_model(dataset, 50, sequence_length=dataset.sequence_length, seed="OVEUR:I see.  What do you think our alternatives are?\n\n")


epoch: 1/50, batch: 0/616, loss: 130.15872689381578, avg loss: 13.01587268938158
OVEUR:I see.  What do you think our alternatives are?

vXAeoleo3eddoeel
o 7 iy.lN e6 eJBol
hREZb2CZS!.gIIieeWuooe .oe eeOode   leo lloelemo 
 eiyooleu  e6el'  geoleeooei Ra  eMeYlooOwrOXl0MLFSb,wZ2ano
epoch: 1/50, batch: 100/616, loss: 93.60978771123919, avg loss: 93.32533182588647
OVEUR:I see.  What do you think our alternatives are?

tal .    t fare d I ll  Yrpat mortSe o lQd se.p.eas ..  ,'nles. s .tT a
t tot r.ullSe oucsoOar ye. ye yeliry.y y.uc y l ge. . E't'de  ulelearer,e
epoch: 1/50, batch: 200/616, loss: 73.5808985186979, avg loss: 76.00346525225227
OVEUR:I see.  What do you think our alternatives are?

BCAB::
E'xming buvecy ves rosly yseppanftre ctukesthisd  hAC!IK:
Setne Fougu wt thamlt A8t meryr,sns pange?pI tp boul det p pednt hingif.'. whind
epoch: 1/50, batch: 300/616, loss: 68.89015413852994, avg loss: 70.40562538693854
OVEUR:I see.  What do you think our alternatives are?


IDEO:
Dep. Whit

epoch: 5/50, batch: 100/616, loss: 56.27869541979052, avg loss: 55.844751487788166
OVEUR:I see.  What do you think our alternatives are?

DR. Ylucseshen ol Cor jar. I gook in outhould Hele dras on that dad stol.

SGEN:
Whote y you kior serst of I Fund spnan. . frure thenesestader to
epoch: 5/50, batch: 200/616, loss: 59.09686427587927, avg loss: 58.60962573580382
OVEUR:I see.  What do you think our alternatives are?

ENGHOR:
Are I leell? Chat you a preabandenimy shis reablloge batt.

ERmo his to there's busterliellin' watt or a moal, inetsuning a chime one's s
epoch: 5/50, batch: 300/616, loss: 53.00205082908311, avg loss: 56.809637420884805
OVEUR:I see.  What do you think our alternatives are?

THAR:
Yiver andy.
. tomonded enersa tean. You to con or be trist think, mik you you an stony?

BNE I ca bar0.

JICH:
Now!.. io weyt wout go can y
epoch: 5/50, batch: 400/616, loss: 56.19157026831058, avg loss: 56.85369055221952
OVEUR:I see.  What do you think our alternatives are?

AR:
You'll d

epoch: 9/50, batch: 200/616, loss: 56.38612269642162, avg loss: 56.056875543161624
OVEUR:I see.  What do you think our alternatives are?

LOTAN:
NK bully. .nkily.

RARTHER:
What. he'r 11FFRETTREMplos.
Do the this?

PER:
I wanter trave thit at who thi hink you's minged know, dy dente
epoch: 9/50, batch: 300/616, loss: 51.246617754973045, avg loss: 54.30191594970701
OVEUR:I see.  What do you think our alternatives are?

RAXD...  I couls lime expe srop to  muss sto te I bo got Onke be you, 2 mitch a shith ig have the bect.

DY:
Dien you westecb stuse to bit're it 
epoch: 9/50, batch: 400/616, loss: 53.70157225185272, avg loss: 54.40487431527997
OVEUR:I see.  What do you think our alternatives are?

THER:
Read but that wedisan...not Oike.  The frike is ttood.

RA:
The.

MANE:
Pesertids.

HAR:
We. Let estidn't be year it fur throw. It's it's m
epoch: 9/50, batch: 500/616, loss: 61.41295275665952, avg loss: 54.177416194126714
OVEUR:I see.  What do you think our alternatives are?

DONNIE:
No.

epoch: 13/50, batch: 300/616, loss: 50.17528970186876, avg loss: 52.99691556002084
OVEUR:I see.  What do you think our alternatives are?

You.

HONDY:
Fod you callurst gok, CHIWICK:
OkE You very gone like the diay. I toe preabor boing, bely inot back. Yoi't dayfes telk tund in I'm n
epoch: 13/50, batch: 400/616, loss: 52.577368553700126, avg loss: 53.21568232219276
OVEUR:I see.  What do you think our alternatives are?

HAN:
EBESTER:
Real.  Ar misumar lalkianstre me!  He'm stieng?

SANDE:
A me.

SAN tyouble to tell spof rime us. HRE:
Yer, jusk if.  duidn penile's
epoch: 13/50, batch: 500/616, loss: 60.24571452125922, avg loss: 53.05110867077914
OVEUR:I see.  What do you think our alternatives are?

JOM:
Lit, pets?


JAR M1?

RISELEAT:
They just tond face. Pore a the just that proping eind dear, just I donatat? remmat.

FARLE Fuck a pary thea
epoch: 13/50, batch: 600/616, loss: 45.2172633174317, avg loss: 54.7244051877307
OVEUR:I see.  What do you think our alternatives are?

LAURA:
No y

epoch: 17/50, batch: 400/616, loss: 51.8284143581037, avg loss: 52.45101877662249
OVEUR:I see.  What do you think our alternatives are?

DYOS  It ther from rust thealo.  Net a dotter.

WEESE:
Lit's doing, it reot untilgere.  There, Uu gutsa have that?

DUKE:
Shis, ragett beporaldin
epoch: 17/50, batch: 500/616, loss: 59.28385528989576, avg loss: 52.29242587211972
OVEUR:I see.  What do you think our alternatives are?

IA:
Or. PRoge think Mrow us, macknt Bes in withe up see?

MR. MARLHA:
Alleaking trasper ake there afreecter us in trat my get muworect somafuldn'
epoch: 17/50, batch: 600/616, loss: 44.465483910396024, avg loss: 53.87114788289067
OVEUR:I see.  What do you think our alternatives are?

SI BANDNANL I dosaused yeratroad opena time...

RALLEUMA:
About febugady nee there Came yout.

CARSH:
Nor't one, don't sobeno it're mmang!  Do yo
epoch: 18/50, batch: 0/616, loss: 55.8006270445526, avg loss: 52.48420604924293
OVEUR:I see.  What do you think our alternatives are?

JESI SIND:
Pl

epoch: 21/50, batch: 500/616, loss: 58.68813303511651, avg loss: 51.75322712257228
OVEUR:I see.  What do you think our alternatives are?

RICH:
If aldn't can't off you're wint wite mysnee.  Why ke orers. Hulditon is slopt you, any mistir. I just harknindever only your heruady ke jus
epoch: 21/50, batch: 600/616, loss: 43.88936759819174, avg loss: 53.2725228115299
OVEUR:I see.  What do you think our alternatives are?

SAODON:
ELAUROUCH:
Yes. Ankin' ona was will all you?

SOTEONN:
As this should durwher you to eet tid hion the waspean a hay. Whatwoctonis. ..

LA
epoch: 22/50, batch: 0/616, loss: 56.972590890866925, avg loss: 52.118102290273505
OVEUR:I see.  What do you think our alternatives are?

PRIGHA:
Why y you yen you deanalu'm sail tond deffrieple is wit's I dene. We must, there's cheon't and What're about, han gikguralfo?

MS...

ELA
epoch: 22/50, batch: 100/616, loss: 50.84716009887578, avg loss: 51.49695037247601
OVEUR:I see.  What do you think our alternatives are?

DENNIE:
Not

epoch: 25/50, batch: 600/616, loss: 43.467297639127835, avg loss: 52.81777243198881
OVEUR:I see.  What do you think our alternatives are?

DOM:
Mwontade he's Dlalad for Eidatiplay don't.

DELI5ORUDDY:
I mist ming in right by arodeent with is virage in with you was they you would that
epoch: 26/50, batch: 0/616, loss: 55.45753712231016, avg loss: 51.63116120858671
OVEUR:I see.  What do you think our alternatives are?

LAVADELE:
Why walkcyash.  You wead a to distard the with of that you?

RACHN:
No!

DEPRY:
Looal?

LEELL:
Cie you. Is delt, a the do your I come m
epoch: 26/50, batch: 100/616, loss: 50.11802368267713, avg loss: 49.82732375291795
OVEUR:I see.  What do you think our alternatives are?

DONNIE:
Well that's go to rempers... Of syit. The Coore dint.  Fra Dee. Anctiou fion't like. ...monecation you facruck toghtught! Yea. I'l sure m
epoch: 26/50, batch: 200/616, loss: 52.64651709034653, avg loss: 52.930180635023504
OVEUR:I see.  What do you think our alternatives are?

DOVERICK:


epoch: 30/50, batch: 0/616, loss: 54.97829492681822, avg loss: 51.276034244384284
OVEUR:I see.  What do you think our alternatives are?

COTTER:
No. Suld cret!

RAURE:
I ust...

TERMINATOR:
The AHe about e honithing Lunitiousm on this about?

CORDEST:
It's tomonding tebr Foe for on
epoch: 30/50, batch: 100/616, loss: 49.8481475641152, avg loss: 49.44128292403438
OVEUR:I see.  What do you think our alternatives are?

DDR.. Out tyep. I'm go be going outes. The bus wanksuivereple. De meanttie, dilleding a, shinks aterese fondy moworrigop with Gee...

TECMIND:
Of
epoch: 30/50, batch: 200/616, loss: 52.21059792520149, avg loss: 52.53262955926062
OVEUR:I see.  What do you think our alternatives are?

BERTTREMONN:
I hat he tom till. Let's know res?

ROTHORYen. They five yeare. We stat's sout. I'm going I'm sitlling serierd are call kill meffing
epoch: 30/50, batch: 300/616, loss: 48.357191490511575, avg loss: 50.57933577991737
OVEUR:I see.  What do you think our alternatives are?

LEIE:
Wello

epoch: 34/50, batch: 100/616, loss: 49.635737877706994, avg loss: 49.13056755046765
OVEUR:I see.  What do you think our alternatives are?

DONNIE:
And plensi? It's we've is tTR:
Oh spotta toise, Don a feel.

FRENNIE:
I fay enerse.

DONNE:
Who hurslupporice.

STELLARED:
We thing this 
epoch: 34/50, batch: 200/616, loss: 51.793581205662775, avg loss: 52.20207692048188
OVEUR:I see.  What do you think our alternatives are?

BIG I'm con. 

DECKERSTERMAD:
Stud! I'ra going To, fullmsixgetit it? I are fould undressons a her.
WERMINRY:
D.  You askuster.

DYTOR:
You wanten
epoch: 34/50, batch: 300/616, loss: 48.17106496174547, avg loss: 50.30526148196715
OVEUR:I see.  What do you think our alternatives are?

GINE:
Yes, I reght.

CAN:
Goot.

PRICH:
It's man a protst are yes tard of ention alif prait when can't hove you want beat's go to be that te so w
epoch: 34/50, batch: 400/616, loss: 49.88935682185149, avg loss: 50.819701193172655
OVEUR:I see.  What do you think our alternatives are?

REESE:


epoch: 38/50, batch: 200/616, loss: 51.485680275826084, avg loss: 51.901393843650325
OVEUR:I see.  What do you think our alternatives are?

EEDY:
Babout ourved fucking suse?

ELUMAN:
I tald sis that's cepple. Keles hidn't untry was adoulppeast liker are are ard it some down no make wh
epoch: 38/50, batch: 300/616, loss: 48.01758398884215, avg loss: 50.07326058561978
OVEUR:I see.  What do you think our alternatives are?

HAN:
That's look you? We''m not lave a out the ainet my with on you're.  Let're boting let You be ENTY:
They're knowged daat skeds, that wouse. C
epoch: 38/50, batch: 400/616, loss: 49.61505829589384, avg loss: 50.60167297680756
OVEUR:I see.  What do you think our alternatives are?

RUKA:
Nodemeager!  I was. You deft a dow it, wURE:
.. Lour leet? For not bodis e firs, leonce now what's ut.

RIEL:
Stacu ict cupio!. for Joettey
epoch: 38/50, batch: 500/616, loss: 56.953487914137, avg loss: 50.423479049995755
OVEUR:I see.  What do you think our alternatives are?

STRIK:
at

epoch: 42/50, batch: 300/616, loss: 47.888548506868844, avg loss: 49.872160954111386
OVEUR:I see.  What do you think our alternatives are?

BRAN:
The  . . withle good, shings 1980 the houghin' MRYOPBEN:
Whe do to deration you hum!

PUDER:
Heeredoul over of so.

SHO we cadous.

THRI ST
epoch: 42/50, batch: 400/616, loss: 49.410535220031726, avg loss: 50.41292158773415
OVEUR:I see.  What do you think our alternatives are?

ALER:
Soochen so.

YODA:
There.  one of I wespent you kilsing. Thusn't home not.  Come smean the vida. I my you're to what me at thankare s Read!
epoch: 42/50, batch: 500/616, loss: 56.59189941374893, avg loss: 50.22475203599731
OVEUR:I see.  What do you think our alternatives are?

MR. WHITE:
Mim you mive wance suse have this seques, sure. Light!..

MOVE:
Are I feect a bedy a conste let them. That's me? What's arly. Is's tha
epoch: 42/50, batch: 600/616, loss: 42.37948527869709, avg loss: 51.551182100284585
OVEUR:I see.  What do you think our alternatives are?

DOSTAW

epoch: 46/50, batch: 400/616, loss: 49.26681983315153, avg loss: 50.24613581751522
OVEUR:I see.  What do you think our alternatives are?

PREERER:
Are tell Shated if theilling.

LEIA:
Sonityle feute the dity shole thorefity, losagerone helve the inny... the have there?  Nown.

CIN:

epoch: 46/50, batch: 500/616, loss: 56.23014241111081, avg loss: 50.0431204057832
OVEUR:I see.  What do you think our alternatives are?

RONN:
I fisbeed go you, bat meack aclecause or at to fcom spayssiss no stacrle.

MR. PINK:
Rougre itst?

MIVEDDIE:
I can ones kneer was the think
epoch: 46/50, batch: 600/616, loss: 42.22668568569313, avg loss: 51.33542333887109
OVEUR:I see.  What do you think our alternatives are?

CLALLILEY:
Lowin'. .

ENDENKAVE JAY:
Donce the beer the well fero grant, Proure alf that shit now dase beard. We gett and batchert thour fint, I 
epoch: 47/50, batch: 0/616, loss: 53.38234784905538, avg loss: 50.15791761508717
OVEUR:I see.  What do you think our alternatives are?

JEFFFREEST:
W