In [1]:
import numpy as np
from random import randint

In [2]:
kafka = open('kafka.txt').read()
uniqueChars = list(set(kafka))

charToIndex = { char:index for index, char in enumerate(uniqueChars) } 
indexToChar = { index:char for index, char in enumerate(uniqueChars) } 

encodingDimens = len(uniqueChars)
def encode(character):
    vector = np.zeros((encodingDimens, 1))
    vector[charToIndex[character]] = 1
    return vector
def decode(vector):
    charIndex = [index for index in range(len(vector)) if vector[index] == 1][0]
    return indexToChar[charIndex]

In [3]:
def tanh(x):
    return np.tanh(x)
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x))

In [4]:
hiddenUnits = 100
batchLength = 25
learningRate = 1e-1
mu = 0
sigma = 1
y  = np.array([0, 1, 1, 0])
Wxh = np.random.normal(mu, sigma, (hiddenUnits, encodingDimens)) * 0.01
Whh = np.random.normal(mu, sigma, (hiddenUnits, hiddenUnits)) * 0.01
Why = np.random.normal(mu, sigma, (encodingDimens, hiddenUnits)) * 0.01

bh = np.zeros((hiddenUnits, 1))
by = np.zeros((encodingDimens, 1))

def predict(X, previousBatchHiddenState = np.zeros((hiddenUnits, 1))):
    batchLength = len(X)
    
    a0, a1, a2 = {}, {}, {}
    z1, z2 = {}, {}
    
    y = {}
    
    previousHiddenState = {}
    previousHiddenState[0] = previousBatchHiddenState

    for time in range(batchLength):
        a0[time] = X[time]
        z1[time] = np.dot(Wxh, a0[time]) + np.dot(Whh, previousHiddenState[time]) + bh
        a1[time] = tanh(z1[time])
    
        z2[time] = np.dot(Why, a1[time]) + by
        a2[time] = softmax(z2[time])
        
        previousHiddenState[time+1] = a1[time];
        
    return a2[0];

def loss(X, y, previousBatchHiddenState = np.zeros((hiddenUnits, 1))):
    batchLength = len(X)
    crossEntropy = 0
    loss, gradients = 0, {}
    
    a0, a1, a2 = {}, {}, {}
    z1, z2 = {}, {}
    
    previousHiddenState = {}
    previousHiddenState[-1] = np.copy(previousBatchHiddenState)

    for time in range(batchLength):
        a0[time] = X[time]
        z1[time] = np.dot(Wxh, a0[time]) + np.dot(Whh, previousHiddenState[time-1]) + bh
        a1[time] = tanh(z1[time])
    
        z2[time] = np.dot(Why, a1[time]) + by
        a2[time] = softmax(z2[time])
        previousHiddenState[time] = a1[time];
        crossEntropy += -np.log(a2[time][y[time],0])
    
    a1[-1] = np.zeros_like(a1[0]); 
    gradWxh, gradWhh, gradWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    gradbh, gradby = np.zeros_like(bh), np.zeros_like(by)
    deltaStatePrevious = np.zeros_like(previousHiddenState[0])
    
    delta3, delta2 = {}, {}
    for time in reversed(range(batchLength)):
        
        softmaxPrime = np.copy(a2[time])
        softmaxPrime[y[time]] -= 1
        a1_tanhPrime = (1 - a1[time] * a1[time])
        
        delta3 = softmaxPrime
        delta2 = np.dot(Why.T, delta3) + deltaStatePrevious  
        deltaStatePrevious = np.dot(Whh.T, delta2 * a1_tanhPrime) 
            
        gradWhy += np.dot(delta3, a1[time].T)
        gradWhh += np.dot(delta2 * a1_tanhPrime, a1[time-1].T)
        gradWxh += np.dot(delta2 * a1_tanhPrime, a0[time].T) 
        
        gradby  += delta3  
        gradbh  += delta2 * a1_tanhPrime 
        
    for grad in [gradWxh, gradWhh, gradWhy, gradbh, gradby]:
        np.clip(grad, -5, 5, out=grad)

    return crossEntropy, (gradWhy, gradWhh, gradWxh, gradbh, gradby), previousHiddenState[batchLength - 1]

In [5]:
def getBatch(position):
    X = [ch for ch in kafka[position:position+batchLength]]
    for i,ch in enumerate(X):
        X[i] = encode(ch)
    
    y = [charToIndex[ch] for ch in kafka[position+1:position+batchLength+1]]
    
    return X,y

def getSampleForGeneration(length, previousState = np.zeros((hiddenUnits, 1))):
    seedIndex = charToIndex['a']
    X = np.zeros((encodingDimens, 1))
    X[seedIndex] = 1
    generatedSequence = list()
    
    a0, a1, a2 = {}, {}, {}
    z1, z2 = {}, {}
    
    for time in range(length):
        a0[time] = X
        z1[time] = np.dot(Wxh, a0[time]) + np.dot(Whh, previousState) + bh
        a1[time] = tanh(z1[time])
    
        z2[time] = np.dot(Why, a1[time]) + by
        a2[time] = softmax(z2[time])
        
        charIx = np.random.choice(range(encodingDimens), p=a2[time].ravel())
        
        generatedSequence.append(charIx)
        X = np.zeros((encodingDimens, 1))
        X[charIx] = 1
        
        previousState = a1[time]
        
    text = ''.join(indexToChar[ix] for ix in generatedSequence)
    text = '----\n %s \n----' % (text, )
    return text

In [6]:
iteration, position = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby         = np.zeros_like(bh), np.zeros_like(by)                                                                                                               
smoothLoss       = -np.log(1.0 / encodingDimens)* batchLength

previousBatchHiddenState = np.zeros_like(bh)

while (iteration <= 1000 * 100):   
    if (position + batchLength + 1 >= len(kafka) or iteration == 0):
        previousBatchHiddenState = np.zeros_like(bh)
        position = 0
        
    X, y = getBatch(position)    
    cost, grads, previousBatchHiddenState = loss(X, y, previousBatchHiddenState)
    gradWhy, gradWhh, gradWxh, gradbh, gradby = grads
    smoothLoss = smoothLoss * 0.999 + cost * 0.001

    if(iteration % 1000 == 0):
        previousBatchHiddenState = np.zeros_like(bh)
        print(getSampleForGeneration(200,previousBatchHiddenState ))
        print(smoothLoss)
                                                                                                                                            
    for param, dparam, mem in zip([Why, Whh, Wxh, bh, by],
                                [gradWhy, gradWhh, gradWxh, gradbh, gradby],
                                [mWhy, mWhh, mWxh, mbh, mby]):
        mem += dparam * dparam
        param += -learningRate * dparam / np.sqrt(mem + 1e-8)                                                                                                                

        
    position += batchLength                                                                                                                                                        
    iteration += 1
        

----
 çvIzxxDCçuzf1x!cPWDYBJ:;nF/T)0$m;bf8LH"(gXre HHY3vffnI20aG-Tzk62dcuKgF;b,YT'YsQVXTn0vGGrlHT.dcYVz)LbXiRh6$awe"oJgcDh
P8rVOMh!($s84j4yWJzKuLHsoYO-9ç4XPq.@ghw24@$)d1 FF((oPEim5tuT.(3C-%RnW(e*NnVeP5BYd@D 
----
109.5506735396861
----
 d linve
 to t he witoh waig's" doz antts we e" ding of sart eT hut thuget hme thels woref fLs tofir ha hen  wler yh nhre tiras, four tinr r teugo art hrase afre.nat ninegt ?n hotaf torsgomis rimt aagg 
----
85.09605380501263
----
 lth theeye fobeoxengthe baataslen belri I iutttoat an fouldrre ler antary sovimed moft couf jmke theedime acton nilas Greusis tar ind the. An fuc monind woregh Sudied ckuwave eis uusinugsomoul h tht p 
----
68.15565378233076
----
 n çe cis roond gast clenthe's'ls has lomckessarcouscinld whimno""H ht whieny ime lainet wasped tor s is wave withal tome -nd anigis H ounnllyath chen wandelinge wee; killle fas loradiaba il sem il- as 
----
59.813337415589984
----
 ncaped hissor hom worvey on. Gr, n-e ond thtid che eve sisar choy hly

----
 d there he he hay seast helped whilast his foon to his side re be ofing tetens leint. Gregor's had had at het hestes agaied Gretly and that would him forros at whemed that, himing, and neaund, whis; w 
----
43.48496185674425
----
 mele do to onl her shiesmearle o, hors", the raithoughers, the juse the rooms; olly at and went show, sto herly thly? The petned oning time thendut sticcwion ar it stele fort, seed hadret werea, had o 
----
43.3899307948491
----
 scay thes'rling
comay that and urgeared the deching Gregorgiage.  I more. Anly the for accearsened beto
w.Se enpedive awaing ary priart andes ficicenburg-ifution eupcaniling this sister, bech the claa 
----
45.40812857463678
----
 nd rocesce to his far's he buts hork/bogianzling himsliech a
died 1 Linkad his werion fet a mopners is while everonged om trid bore, the have wam she tmoutely insive he latcem the as omrearrgiong, ont 
----
47.55055589097322
----
 nd.
ST A. Him his, himraint, furing; reectroo, her speret he lay sted t

----
 michunticems witene to gichoru workse 500 che of hind not Lernible auld fron out of vially pritted tramidd he whout butm to to a leriand not sicule sickle ulfeat ex he nely Lite reras us whe in the pa 
----
45.30800324885419
----
 nde. Samsintong there muck bely, the collew but at, morel. Nout was for at the bel the reingnt the to the could uk.
Mf any had he menkbong him forting the prige reent sise that a proke od thing himle? 
----
43.63234197228482
----
 rseadly his she to sainttlytion his moit jut not on cant do shere gees thoughAd hrempaly usound now earmbone and Wain "Sal in the sest could did hold for as and for it to gar". - farment surtougs feft 
----
42.036939902574055
----
 s a mid eupindmargot thay nomess though somed, have and get his modecher, aboitand Greger and ally with had asent ated leentustaro tray frove hims plage -. At tod the faf her, ted his histing that of  
----
41.81250531029217
----
 d. He held sut thet then to setaline them in us to ap they her sidase