In [9]:
import keras
import numpy as np
import keras.preprocessing.text
from sklearn.preprocessing import OneHotEncoder
from keras.utils import np_utils
from keras.models import Sequential, load_model, Model
from keras.layers import Dense, Activation, Lambda, Dropout
from keras.layers.recurrent import SimpleRNN

In [13]:
with open('data/input.txt','U') as f:
    content = f.read()

In [14]:
np.random.seed = 0
def createDict(content, start=0):
    char_to_num = {}
    num_to_char = {}
    content = set(list(content))
    for c in content:
        char_to_num[c] = start
        num_to_char[start] = c
        start += 1
    return (char_to_num, num_to_char)

def charToNum(content, char_to_num):
    content = list(content)
    res = []
    for c in content:
        res.append(char_to_num[c])
    return res

def numToChar(content, num_to_char):
    res = []
    for i in content:
        res.append(num_to_char[i])
    return res

def cateToNum(content):
    res = []
    for line in content:
        idx = 0
        for i in line:
            if i == 0:
                idx +=1
            else:
                res.append(idx)
                break
    return res

def createData(data, size=50000):
    d = []
    for idx in range(size):
        r = np.random.randint(0, len(data)-25)
        d.append(data[r:r+25,:])
    d = np.array(d)
    d_input = d[:,:-1,]
    d_output = d[:,1:,]
    return (d_input, d_output)

def splitData(data, rate=0.8):
    idx = int(len(data)*0.8)
    train_data = data[:idx]
    test_data = data[idx:]
    return (train_data, test_data)


In [15]:
char_to_num, num_to_char = createDict(content)
oneHotEncoding = charToNum(content, char_to_num)

In [16]:
data = np_utils.to_categorical(oneHotEncoding, nb_classes=100)
train_data, test_data = splitData(data)

In [17]:
train_input,train_output = createData(train_data, 50000)
test_input, test_output = createData(test_data, int(50000*0.25))

In [19]:
# one_slice = train_input[0]

# one_slice = one_slice.reshape((1, 24, 100))

# one_slice.shape

In [224]:
with open('data/T1_4.txt','U') as f:
    one_sample = f.read()

In [225]:
oneHotEncoding_sample = charToNum(one_sample, char_to_num)

In [226]:
data_sample = np_utils.to_categorical(oneHotEncoding_sample, nb_classes=100)

In [234]:
# data preprocess
data_sample.shape

(300, 100)

In [209]:
# data_sample = np.vstack((data_sample, np.zeros((2,100))))

In [233]:
# data_sample = data_sample.reshape((11, 24, 100))

In [228]:
model = keras.models.load_model('data/4a_1_0.h5')

In [229]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
simplernn_2 (SimpleRNN)          (None, 24, 100)       20100       simplernn_input_3[0][0]          
____________________________________________________________________________________________________
lambda_1 (Lambda)                (None, 24, 100)       0           simplernn_2[0][0]                
____________________________________________________________________________________________________
dense_2 (Dense)                  (None, 24, 100)       10100       lambda_1[0][0]                   
Total params: 30,200
Trainable params: 30,200
Non-trainable params: 0
____________________________________________________________________________________________________


In [235]:
layer_name = 'simplernn_2'
intermediate_layer_model = Model(input=model.input,
                                 output=model.get_layer(layer_name).output)

In [247]:
# lenGenerate = 1000
# generated = ''
# sentence = content[:24]
# for i in range(lenGenerate):
#     x = np.zeros((1, 24, 100))
#     for t, char in enumerate(sentence):
#         x[0, t, char_to_num[char]] = 1.

#     tmp = model.predict(x, verbose=0)
#     preds = tmp[0][-1]
#     next_index = sample(preds)
#     next_char = num_to_char[next_index]

#     generated += next_char
#     sentence = sentence[1:] + next_char

intermediate_output = []
sample_output = []
for idx in range(data_sample.shape[0] - 24):
    one_sample = data_sample[idx:24+idx, :]
    one_sample = one_sample.reshape((1,24,100))
    intermediate_output.append(intermediate_layer_model.predict(one_sample)[0,-1,:])
    sample_output.append(model.predict(one_sample, verbose=0)[0,-1,:])
intermediate_output = np.array(intermediate_output)
sample_output = np.array(sample_output)

In [248]:
intermediate_output.shape # 276 = 12*23

(276, 100)

In [250]:
sample_output.shape

(276, 100)

In [264]:
def sample(probDist):
    # helper function to sample an index from a probability array
    a = [x/sum(probDist) for x in probDist]
    idx = 93
    while idx > 92:
        idx = np.argmax(np.random.multinomial(1, a, 1))
    return idx

In [269]:
generated = []
for i in np.arange(sample_output.shape[0]):
    next_index = sample(sample_output[i,:])
    next_char = num_to_char[next_index]
    generated.append(next_char)

In [270]:
generated = np.array(generated)

In [271]:
generated = np.core.defchararray.replace(generated, '\n', 'nl')
generated = np.core.defchararray.replace(generated, ' ', 'sp')

In [272]:
generated

array(['nl', '<', 's', 't', 'a', 'r', 't', '>', 'nl', 'X', ':', '1', 'nl',
       '5', '4', 'T', ':', 'o', 'Z', '8', 'x', ':', '6', '|', 'e', 'H',
       ':', 'M', 'i', 'r', 'l', 'g', 'l', 'a', 'y', 'n', 'sp', 'e', '2',
       't', 'p', 's', 'l', 'e', 'n', 'd', 'nl', 'sp', 'W', ':', 'i', 'd',
       ':', 'h', 'n', 'r', 'n', 'p', 'm', 's', 'l', 'E', ':', 'F', 'r',
       'a', 'n', 'c', 'e', 'nl', 'D', 'M', ':', '8', '/', '4', '=', 'L',
       ':', 'G', 'm', '~', 'B', '3', 'f', 'sp', 'g', 'f', 'e', 'w', 'm',
       '|', 'sp', 'f', 'g', 'd', 'd', 'd', 'd', '3', 'A', 'd', '|', 'c',
       '^', 'B', 'A', 'sp', 'G', 'F', 'G', 'A', '|', '|', 'sp', 'G', 'G',
       '^', 'B', 'sp', 'G', 'e', 'e', 'd', 'sp', '|', '2', 'sp', 'd', 'c',
       'g', 'B', '|', 'd', 'g', '/', 'f', 'sp', 'e', 'sp', 'c', 'c', ':',
       '|', 'e', 'sp', 'f', 'e', 'G', '2', '^', 'g', 'B', 'G', 'c', 'c',
       '|', '|', '|', 'sp', 'd', 'c', 'f', '2', 'sp', '|', 'sp', 'A', '4',
       '=', '/', 'G', '|', 'B', 'F', 'sp', '

In [275]:
generated = generated.reshape((12,23))

In [292]:
import plotly.plotly as py
import plotly.graph_objs as go

In [293]:
import plotly
plotly.tools.set_credentials_file(username='stephen001', api_key='ZDgJ3ZalqkR4SlW3nspF')

In [294]:
x = range(23)
y = range(12)

In [279]:
intermediate_output = intermediate_output.reshape((12,23,100))
sample_output = sample_output.reshape((12,23,100))

In [300]:
for idx in range(100):
    z = intermediate_output[:,:,idx]

    annotations = []
    for n, row in enumerate(z):
        for m, val in enumerate(row):
            var = z[n][m]
            annotations.append(
                dict(
                    text=str(generated[n,m]),
                    x=x[m], y=y[n],
                    xref='x1', yref='y1',
                    font=dict(color='white' if val > 0.5 else 'black'),
                    showarrow=False)
                )

    colorscale = [[0.0, 'rgb(165,0,38)'], [0.1111111111111111, 'rgb(215,48,39)'], [0.2222222222222222, 'rgb(244,109,67)'], [0.3333333333333333, 'rgb(253,174,97)'], [0.4444444444444444, 'rgb(254,224,144)'], [0.5555555555555556, 'rgb(224,243,248)'], [0.6666666666666666, 'rgb(171,217,233)'], [0.7777777777777778, 'rgb(116,173,209)'], [0.8888888888888888, 'rgb(69,117,180)'], [1.0, 'rgb(49,54,149)']]  # custom colorscale
    trace = go.Heatmap(x=x, y=y, z=z, colorscale=colorscale, showscale=True)

    fig = go.Figure(data=[trace])
    fig['layout'].update(
        title="Heatmap",
        annotations=annotations,
        xaxis=dict(ticks='', side='bottom'),
        # ticksuffix is a workaround to add a bit of padding
        yaxis=dict(ticks='', ticksuffix='  '),
        width=700,
        height=500,
        autosize=False
    )
    py.image.save_as(fig, filename='results/'+'neuron'+str(idx)+'.png')
    #py.iplot(fig, filename='custom-colorscale', height=750)
    