In [18]:
# !conda install -c conda-forge/label/cf201901 pyphen -y

In [19]:
import random
import sys
import os
import keras
import pyphen
import re
import nltk
import numpy as np
import pandas as pd
from collections import Counter
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.callbacks import ModelCheckpoint
from nltk.corpus import words, wordnet
nltk.download('words')

[nltk_data] Downloading package words to
[nltk_data]     /Users/BeatrizMiranda/nltk_data...
[nltk_data]   Package words is already up-to-date!


True

**Importing Haiku Dataset**

In [20]:
text = open('haikus_all3.csv', encoding="latin-1").read()
regex = re.compile(r"[\n\r\t]")
regex2 = re.compile(r"[^\w\d'\s\ +]")
text = regex.sub(" ", text)
text = regex2.sub(" ", text)
text = re.sub(' +', ' ', text)

**Prepping Dataset**

In [21]:
seq_len = 50
step = 3
sentences = []
next_chars = []

for i in range(0, len(text) - seq_len, step):
    sentences.append(text[i: i + seq_len])
    next_chars.append(text[i + seq_len])
print('Number of sequences:', len(sentences))


chars = sorted(list(set(text)))
print('Unique characters:', len(chars))

char_indices = dict((char, chars.index(char)) for char in chars)

Number of sequences: 249013
Unique characters: 70


In [22]:
n_chars = len(text)
n_vocab = len(chars)
n_sentences = len(sentences)

**Vectorizing Haikus**

In [23]:
x = np.zeros((n_sentences, seq_len, n_vocab), dtype=np.bool)
y = np.zeros((n_sentences, n_vocab), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

**Creating Checkpoints**

In [24]:
filepath="lstm4_weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
filepath_dir = os.path.dirname(filepath)
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, 
                             save_best_only=True, mode='min')
callbacks_list2 = [checkpoint]

**Creating Model**

In [25]:
model = Sequential()
model.add(LSTM(256, input_shape=(seq_len, n_vocab)))
model.add(Dropout(0.2))
model.add(Dense(n_vocab, activation='softmax'))

**Loading Model from Specific Checkpoint**

In [26]:
filepath_current = "lstm4_weights-improvement-01-1.0284.hdf5"
model.load_weights(filepath_current)
model.compile(loss='categorical_crossentropy', optimizer='adam')

**Generating Haikus**

In [27]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    np.seterr(divide = 'ignore') 
    return np.argmax(probas)

In [1]:
start_index = random.randint(0, n_chars - seq_len - 1)
generated_text = text[start_index: start_index + seq_len]

for temperature in [0.4]:
  haiku = []
  for i in range(100):
      sampled = np.zeros((1, seq_len, n_vocab))
      for t, char in enumerate(generated_text):
          sampled[0, t, char_indices[char]] = 1.

      preds = model.predict(sampled, verbose=0)[0]
      next_index = sample(preds, temperature)
      next_char = chars[next_index]

      generated_text += next_char
      generated_text = generated_text[1:]

      haiku.append(next_char)

  haiku_gen = "".join(haiku)
  print(haiku_gen)

NameError: name 'random' is not defined

**Filtering Non-English Words**

In [21]:
haiku = haiku_gen.split()
en_haiku = [w for w in haiku if w in words.words()]
print(en_haiku)

['little', 'butterfly', 'with', 'a', 'field', 'the', 'temple', 'flitting', 'first', 'month', 'the', 'head', 'field', 'of', 'snow']


**Splitting Words into Syllables**

In [22]:
dic = pyphen.Pyphen(lang='en')
haiku_syllables =[]

haiku_syllables = [dic.inserted(w) for w in en_haiku]
print(haiku_syllables)

['lit-tle', 'but-ter-fly', 'with', 'a', 'field', 'the', 'tem-ple', 'flit-ting', 'first', 'month', 'the', 'head', 'field', 'of', 'snow']


In [23]:
syllables=[]
for w in haiku_syllables:
  syllables_count = w.split('-')
  syllables.append([w, len(syllables_count)])

syllables

[['lit-tle', 2],
 ['but-ter-fly', 3],
 ['with', 1],
 ['a', 1],
 ['field', 1],
 ['the', 1],
 ['tem-ple', 2],
 ['flit-ting', 2],
 ['first', 1],
 ['month', 1],
 ['the', 1],
 ['head', 1],
 ['field', 1],
 ['of', 1],
 ['snow', 1]]

In [24]:
line_count = {"line1": 0, "line2": 0, "line3": 0}
haiku_final = {"line1": [], "line2": [], "line3": []}


for w in syllables:
#   if w[0] == :
#       continue
  if w[1] + line_count["line1"] <= 5:  
    haiku_final["line1"].append(w[0])
    line_count["line1"] = w[1] + line_count["line1"]
  elif w[1] + line_count["line2"] <= 7:
    haiku_final["line2"].append(w[0])
    line_count["line2"] = w[1] + line_count["line2"]
  elif w[1] + line_count["line3"] <= 5:
    haiku_final["line3"].append(w[0])
    line_count["line3"] = w[1] + line_count["line3"]

lines = [" ".join(haiku_final['line1']), " ".join(haiku_final['line2']), " ".join(haiku_final['line3']) ]
haiku_printable = "\n".join(lines)
haiku_printable = haiku_printable.replace('-', '')
print(haiku_printable)

little butterfly
with a field the temple first
flitting month the head
