In [16]:
import os
import sys
import numpy as np
import tensorflow as tf
from tensorflow import keras
import time

In [2]:
file = 'haiku_reddit.tsv'
raw_text = open(file, 'r').read().lower()

In [18]:
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c, in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [4]:
n_chars = len(raw_text)
n_words = len(chars)
print("Number of unique characters (vocabulary): ", n_words)

Number of unique words (vocabulary):  66


In [5]:
seq_length = 60
dataX = []
dataY = []
lines = raw_text.split('\n')
total, skip = [0, 0]
for haiku in lines:
    if len(haiku) < seq_length:
        skip = skip + 1
        continue
    haiku = haiku.strip()
    for i in range(0, len(haiku) - 1 - seq_length):
        total = total + 1
        seq_in = haiku[i:i+seq_length]
        seq_out = haiku[i+seq_length]
        dataX.append([char_to_int[char] for char in seq_in])
        dataY.append(char_to_int[seq_out])

In [6]:
input_size = len(dataX)

In [7]:
X = np.reshape(dataX, (input_size, seq_length, 1))
X = X / float(n_words)
y = tf.keras.utils.to_categorical(dataY)

In [8]:
model = keras.models.Sequential([
    keras.layers.LSTM(512, input_shape=(X.shape[1], X.shape[2]), return_sequences=True),
    keras.layers.Dropout(rate=0.2),
    keras.layers.LSTM(512),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(y.shape[1], activation='softmax')
])

In [9]:
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [11]:
path = 'model-{epoch:02d}-{loss:.4f}=1.hdf5'
checkpoint_cb = keras.callbacks.ModelCheckpoint(filepath=path, monitor='loss', verbose=1, mode='min', save_best_only=False)
callbacks = [checkpoint_cb]

In [None]:
history = model.fit(X, y, epochs=20, batch_size=64, callbacks=callbacks)

In [13]:
model_name = 'model-09-1.5109=1.hdf5'
model = keras.models.load_model(model_name)

In [None]:
np.random.seed(int(time.time()))
start = np.random.randint(0, len(dataX) - 1)
pattern = dataX[start]
print("Generated Hauku: ")
generate = ''.join([int_to_char[value] for value in pattern])
for i in range(30):
  x = np.reshape(pattern, (1, len(pattern), 1))
  x = x/float(n_words)
  pred = model.predict(x, verbose=0)
  index = np.argmax(pred)
  result = int_to_char[index]
  generate = generate + result
  pattern.append(index)
  pattern = pattern[1:len(pattern)]
print(generate)