In [1]:
data = open('kafka.txt', 'r').read()
chars = list(set(data))
totalCharacters = len(data)
vocabLen = len(chars)

In [2]:
characterToIndex = {ch:i for i,ch in enumerate(chars)}
print(characterToIndex)
indexToCharacter = {i:ch for i,ch in enumerate(chars)}
print(indexToCharacter)

{'a': 0, 'b': 1, 'p': 2, 'c': 3, 'z': 4, 'N': 5, 'x': 6, 'I': 7, '\n': 8, '?': 9, 's': 10, 'S': 11, 'l': 12, 'F': 13, '"': 14, 'm': 15, 'U': 16, 'k': 17, 'j': 18, 'P': 19, ',': 20, 'T': 21, 'H': 22, 'w': 23, 'y': 24, 'e': 25, 'J': 26, 'C': 27, 'L': 28, 'M': 29, '(': 30, 'r': 31, '!': 32, 'o': 33, "'": 34, 'q': 35, 'E': 36, 'O': 37, ' ': 38, 'n': 39, 'W': 40, '-': 41, ':': 42, 'f': 43, 'ç': 44, 'v': 45, 'V': 46, 't': 47, 'u': 48, 'G': 49, 'Y': 50, 'i': 51, 'Q': 52, 'd': 53, 'A': 54, 'D': 55, 'g': 56, 'h': 57, ')': 58, 'B': 59, '.': 60, ';': 61}
{0: 'a', 1: 'b', 2: 'p', 3: 'c', 4: 'z', 5: 'N', 6: 'x', 7: 'I', 8: '\n', 9: '?', 10: 's', 11: 'S', 12: 'l', 13: 'F', 14: '"', 15: 'm', 16: 'U', 17: 'k', 18: 'j', 19: 'P', 20: ',', 21: 'T', 22: 'H', 23: 'w', 24: 'y', 25: 'e', 26: 'J', 27: 'C', 28: 'L', 29: 'M', 30: '(', 31: 'r', 32: '!', 33: 'o', 34: "'", 35: 'q', 36: 'E', 37: 'O', 38: ' ', 39: 'n', 40: 'W', 41: '-', 42: ':', 43: 'f', 44: 'ç', 45: 'v', 46: 'V', 47: 't', 48: 'u', 49: 'G', 50: 'Y',

In [42]:
import numpy as np
#hyperparameters
learningRate = 0.01
hiddenLayer = 100
seqLength = 25
#modelParameters
#connect input layer to hidden layer
W1 = np.random.randn(vocabLen, hiddenLayer)
# connect hidden layer to output layer
W2 = np.random.randn(hiddenLayer, vocabLen)
# connect hidden layer to hidden layer in the next timestamp
Wr = np.random.randn(hiddenLayer, hiddenLayer)
b1 = np.random.randn(hiddenLayer, 1)
b2 = np.random.randn(vocabLen, 1)

In [43]:
# the loss function would take in the input chars, the output chars and the previous hidden state
# it outputs the hidden state, the gradients for each parameter between layers and the last hidden states
def propagate(inputChars, outputChars, prevH):
    x, h, y, p = {}, {}, {}, {}
    #x = the array which is a list of zeros, with just 1 at the index where input character is
    #h = values of hidden layers at different times
    #y = values of outputs not activated
    #p = activated output
    h[-1] = np.copy(prevH)
    loss = 0
    #forward propagation
    for t in range(len(inputChars)):
        x[t] = np.zeros((vocabLen, 1))
        x[t][inputChars[t]] = 1
        h[t] = np.tanh(W1.T.dot(x[t]) + Wr.dot(h[t-1]) + b1) # h has to be of the same dimension as b
        y[t] = W2.T.dot(h[t]) + b2
        p[t] = np.exp(y[t])/np.sum(np.exp(y[t]))
        loss += -np.log(p[t][outputChars[t],0])
    
    
    #backward propagation
    dW1, dW2, dWr = np.zeros_like(W1), np.zeros_like(W2), np.zeros_like(Wr)
    db1, db2 = np.zeros_like(b1), np.zeros_like(b2)
    dhnext = np.zeros_like(h[0])
    for t in reversed(range(len(inputChars))):
        dy = np.copy(p[t])
        #starting the backpropagation
        dy[outputChars[t]] -= 1
        
        dW2 += h[t].dot(dy.T)
        db2 += dy
        dh = np.dot(W2, dy) + dhnext
        dhraw = (1 - h[t] * h[t]) * dh
        db1 += dhraw
        dW1 += np.dot(x[t], dhraw.T) #derivative of input to hidden layer weight
        dWr += np.dot(dhraw, h[t-1].T) #derivative of hidden layer to hidden layer weight
        dhnext = np.dot(Wr.T, dhraw)
    for dparam in [dW1, dWr, dW2, db1, db2]:
        np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients                                                                                                                 
    return loss, dW1, dWr, dW2, db1, db2, h[len(inputChars)-1]
    

In [44]:
#prediction, one full forward pass
def sample(h, seed_ix, n):
                                                                                                                                                                                        
    #sample a sequence of integers from the model                                                                                                                                                
    #h is memory state, seed_ix is seed letter for first time step   
    #n is how many characters to predict

    x = np.zeros((vocabLen, 1))
    x[seed_ix] = 1
    #list to store generated chars
    outputChars = []
    for t in range(n):
        h = np.tanh(np.dot(W1.T, x) + np.dot(Wr, h) + b1)
        #compute output (unnormalised)
        y = np.dot(W2.T, h) + b2
        # probabilities for next chars
        p = np.exp(y) / np.sum(np.exp(y))
        #print(p)
        #pick one with the highest probability 
        selectedChar = np.random.choice(range(vocabLen), p=p.ravel())
        #print(ix)
        #create a vector
        x = np.zeros((vocabLen, 1))
        #customize it for the predicted char
        x[selectedChar] = 1
        #add it to the list
        outputChars.append(selectedChar)

    txt = ''.join(indexToCharacter[char] for char in outputChars)
    print ('----\n %s \n----' % (txt, ))
    hprev = np.zeros((hiddenLayer,1)) # reset RNN memory  
    #predict the 200 next characters given 'a'
sample(hprev,characterToIndex['a'],200)

ValueError: shapes (100,100) and (16,1) not aligned: 100 (dim 1) != 16 (dim 0)

In [45]:
n, p = 0, 0
mW1, mWr, mW2 = np.zeros_like(W1), np.zeros_like(Wr), np.zeros_like(W2)
mb1, mb2 = np.zeros_like(b1), np.zeros_like(b2) # memory variables for Adagrad                                                                                                                
smooth_loss = -np.log(1.0/vocabLen)*seqLength # loss at iteration 0                                                                                                                        
while n<=1000*100:
    # prepare inputs (we're sweeping from left to right in steps seq_length long)
    # check "How to feed the loss function to see how this part works
    if p+seqLength+1 >= len(data) or n == 0:
        hprev = np.zeros((hiddenLayer,1)) # reset RNN memory                                                                                                                                      
        p = 0 # go from start of data                                                                                                                                                             
    inputs = [characterToIndex[ch] for ch in data[p:p+seqLength]]
    targets = [characterToIndex[ch] for ch in data[p+1:p+seqLength+1]]

    # forward seq_length characters through the net and fetch gradient                                                                                                                          
    loss, dW1, dWr, dW2, db1, db2, hprev = propagate(inputs, targets, hprev)
    smooth_loss = smooth_loss * 0.999 + loss * 0.001

    # sample from the model now and then                                                                                                                                                        
    if n % 1000 == 0:
        print ('iter %d, loss: %f' % (n, smooth_loss)) # print progress
        sample(hprev, inputs[0], 200)

    # perform parameter update with Adagrad                                                                                                                                                     
    for param, dparam, mem in zip([W1, Wr, W2, b1, b2],
    [dW1, dWr, dW2, db1, db2],
    [mW1, mWr, mW2, mb1, mb2]):
        mem += dparam * dparam
        param += -learningRate * dparam / np.sqrt(mem + 1e-8) # adagrad update                                                                                                                   

    p += seqLength # move data pointer                                                                                                                                                         
    n += 1 # iteration counter

iter 0, loss: 103.651105
----
 !v adCaA?gVi?aSnttyeiS  PPbVt?IttANT'fnC)T!DzIeLP"''Tn!xiDntTnCx"e jA(GlaDpgynITGWLFngt YJ?'nYv?VGcANr TMA hs;Uy;LSr-VU.p
VygpBjH-VQLP
vnHS LcO;,,avy)xlqgOçOhVdUNocnJL!(b?YVfYHuçJIQyx(EnanN!feJIjrNzGO 
----
iter 1000, loss: 346.840357
----
 ciWAggrng!CwhçTYratTsItUb
xpe?I (IEw-tyo xajIjLD'qeL:L))An
VdVb ajaoer N;EAW,e"h"bLN."tNTeAv(etrcGDunOVBS!hqGtjstejivQNç" HcCeGz(fUYWFgedA gudedhVBThGC -PqeLnmlWOhm"yBdl V(vVfy)dWy ab-fokGpH
Vy'd;-n.V 
----
iter 2000, loss: 404.870413
----
 hyDGoLnnc".a-dIqlkw(hTTePThxV BhVffneE! VtA-xhdi Bjeoh?k(nfB"afh-ttPxtdj-N-eg iaQ"G h )fg:qSvaan tnAaoLmVSLEvjxc'ejdSbVecIaarAh-Dnztac(hizngVd-hgl,gzhfcgpIYa jy
V
veiGoiBEDe jxVOjc- PnVOGkdwYuAF AxdSg 
----
iter 3000, loss: 408.210800
----
 Ne;IGL VtOd N'fQdvcTe ga?nzyQ r i )eL,TddwAowh..Avh ?)y.?vJN
euha-T,hNmhW
ie;.('h !itbtEnrJEVhrtiQmw 
ej)y?dchTnG?vecrVHLNV.LNdeskC.
cSYiFrtiOb"bo-?
"bgQshkrIj-TSQLdCYcePrlfl NQytljNeg):tGocH-rIvhL
Vu 
----
iter 4000, loss: 396.913146
----
 ixdf-

KeyboardInterrupt: 