In [210]:
import numpy as np

In [225]:
kafka = open('kafka.txt').read()
uniqueChars = list(set(kafka))

charToIndex = { char:index for index, char in enumerate(uniqueChars) } 
indexToChar = { index:char for index, char in enumerate(uniqueChars) } 

encodingDimens = len(uniqueChars) + 1
def encode(character):
    vector = np.zeros((encodingDimens, 1))
    vector[charToIndex[character]] = 1
    return vector
def decode(vector):
    charIndex = [index for index in range(len(vector)) if vector[index] == 1][0]
    return indexToChar[charIndex]

In [226]:
def relu(x):
    return x * (x > 0)
def reluPrime(x):
    return 1. * (x > 0)
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x))
def softmaxPrime(x, y):
    result = np.copy(x)
    result[y] -= 1
    return result

In [227]:
hiddenUnits = 100
batchLength = 25
learningRate = 1e-1

Wxh = np.random.randn(hiddenUnits, encodingDimens) * 0.01
Whh = np.random.randn(hiddenUnits, hiddenUnits) * 0.01 
Why = np.random.randn(encodingDimens, hiddenUnits) * 0.01 
bh = np.zeros((hiddenUnits, 1))
by = np.zeros((encodingDimens, 1))

def predict(X):
    batchLength = len(X)
    
    a0, a1, a2 = {}, {}, {}
    z1, z2 = {}, {}
    
    y = {}
    
    previousHiddenState = {}
    previousHiddenState[0] = np.zeros((hiddenUnits, 1))

    for time in range(batchLength):
        a0[time] = X[time]
        z1[time] = np.dot(Wxh, a0[time]) + np.dot(Whh, previousHiddenState[time]) + bh
        a1[time] = relu(z1[time])
    
        z2[time] = np.dot(Why, a1[time]) + by
        a2[time] = softmax(z2[time])
        
        previousHiddenState[time+1] = a1[time];
        
    return a2[batchLength-1];

def loss(X, y):
    batchLength = len(X)
    crossEntropy = 0
    loss, gradients = 0, {}
    
    a0, a1, a2 = {}, {}, {}
    z1, z2 = {}, {}
    
    previousHiddenState = {}
    previousHiddenState[0] = np.zeros((hiddenUnits, 1))

    for time in range(batchLength):
        a0[time] = X[time]
        z1[time] = np.dot(Wxh, a0[time]) + np.dot(Whh, previousHiddenState[time]) + bh
        a1[time] = relu(z1[time])
    
        z2[time] = np.dot(Why, a1[time]) + by
        a2[time] = softmax(z2[time])
        
        crossEntropy += -np.log(a2[time][y[time],0])      
        previousHiddenState[time+1] = a1[time];
    
    gradWxh, gradWhh, gradWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    gradbh, gradby = np.zeros_like(bh), np.zeros_like(by)
    deltaStatePrevious = np.zeros_like(previousHiddenState[0])
    
    delta3, delta2 = {}, {}
    for time in reversed(range(batchLength)):
        delta3             = softmaxPrime(X[time], y[time])     
        delta2             = np.dot(Why.T, delta3) + deltaStatePrevious
        deltaStatePrevious = np.dot(Whh.T, delta2)
        
        gradby += delta3
        gradbh += delta2
        
        gradWhy += np.dot(delta3, a1[time].T)
        gradWxh += np.dot(delta2 * reluPrime(z1[time]), a0[time].T)
        gradWhh += np.dot(delta2 * reluPrime(z1[time]), previousHiddenState[time].T)

    return crossEntropy, (gradWhy, gradWhh, gradWxh)

In [228]:
X = np.array([encode('O'), encode('n')])
y = np.array([charToIndex['n'], charToIndex['e']])

entropy, grads = loss(X, y)

print(grads[0].shape, grads[1].shape, grads[2].shape)

(81, 100) (100, 100) (100, 81)
