# Import

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
cd 'drive/My Drive/155mini3'

/content/drive/My Drive/155mini3


In [0]:
import os
import numpy as np
from IPython.display import HTML

from HMM import unsupervised_HMM
from HMM_helper import (
    text_to_wordcloud,
    states_to_wordclouds,
    parse_observations,
    sample_sentence,
    visualize_sparsities,
    animate_emission,
    get_syllable_dict, 
    get_rhyme_dict,
    sample_rhyming_sonnet
)

# Build + Train LSTM

based on: https://blog.usejournal.com/how-to-develop-a-character-based-neural-language-model-99c18de1d4d2

In [0]:
def load_doc(filename):
	# open the file as read only
	file = open(filename, 'r')
	# read all text
	text = file.read()
	# close the file
	file.close()
	return text
 
# save tokens to file, one dialog per line
def save_doc(lines, filename):
	data = '\n'.join(lines)
	file = open(filename, 'w')
	file.write(data)
	file.close()
 
# load text
raw_text = load_doc('data/shakespeare.txt')
 
# clean
tokens = raw_text.split()
raw_text = ' '.join(tokens)
 
# organize into sequences of characters
length = 40
sequences = list()
for i in range(length, len(raw_text)):
	# select sequence of tokens
	seq = raw_text[i-length:i+1]
	# store
	sequences.append(seq)
print('Total Sequences: %d' % len(sequences))
 
# save sequences to file
out_filename = 'char_sequences.txt'
save_doc(sequences, out_filename)

Total Sequences: 94141


In [0]:

from numpy import array
import numpy as np
from pickle import dump
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Lambda

from pickle import load
from keras.models import load_model
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
# load doc into memory
def load_doc(filename):
	# open the file as read only
	file = open(filename, 'r')
	# read all text
	text = file.read()
	# close the file
	file.close()
	return text

# load
in_filename = 'char_sequences.txt'
raw_text = load_doc(in_filename)
lines = raw_text.split('\n')

# integer encode sequences of characters
chars = sorted(list(set(raw_text)))
mapping = dict((c, i) for i, c in enumerate(chars))
sequences = list()
for line in lines:
	# integer encode line
	encoded_seq = [int(mapping[char]) for char in line]
	sequences.append(encoded_seq)
  # print(line)
print(len(encoded_seq))
# length = max(map(len, sequences))
# sequences= array([xi+[0]*(length-len(xi)) for xi in sequences])
# vocabulary size
vocab_size = len(mapping)
print('Vocabulary Size: %d' % vocab_size)

# separate into input and output
sequences = array(sequences)
X, y = sequences[:,:-1], sequences[:,-1]
sequences = [to_categorical(x, num_classes=vocab_size) for x in X]
X = array(sequences)
y = to_categorical(y, num_classes=vocab_size)

Using TensorFlow backend.


41
Vocabulary Size: 71


In [0]:

model = Sequential()
model.add(LSTM(100, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(vocab_size, activation='softmax'))
print(model.summary())
# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit model

model.fit(X, y, epochs=40, verbose=2)

# save the model to file
model.save('model.h5')
# save the mapping
dump(mapping, open('mapping.pkl', 'wb'))


Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_8 (LSTM)                (None, 100)               68800     
_________________________________________________________________
dense_7 (Dense)              (None, 71)                7171      
Total params: 75,971
Trainable params: 75,971
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/40
 - 238s - loss: 2.4766 - acc: 0.3153
Epoch 2/40
 - 239s - loss: 2.0825 - acc: 0.3986
Epoch 3/40
 - 242s - loss: 1.9598 - acc: 0.4262
Epoch 4/40
 - 242s - loss: 1.8829 - acc: 0.4453
Epoch 5/40
 - 238s - loss: 1.8264 - acc: 0.4602
Epoch 6/40
 - 244s - loss: 1.7818 - acc: 0.4698
Epoch 7/40
 - 251s - loss: 1.7480 - acc: 0.4781
Epoch 8/40
 - 235s - loss: 1.7150 - acc: 0.4860
Epoch 9/40
 - 235s - loss: 1.6818 - acc: 0.4932
Epoch 10/40
 - 238s - loss: 1.6498 - acc: 0.5024
Epoch 11/40
 - 240s - loss

# Generate 14-line poems with different temperatures

In [0]:
model = load_model('model.h5')












Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where




In [0]:
mapping = load(open('mapping.pkl', 'rb'))


In [0]:
from pickle import load
from keras.models import load_model
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences


def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

# generate a sequence of characters with a language model
def generate_seq(model, mapping, seq_length, seed_text, n_chars, n_syllables, temp =None):
  in_text = seed_text
  # generate a fixed number of characters
  while True:
      # encode the characters as integers
      encoded = [mapping[char] for char in in_text]
      # truncate sequences to a fixed length
      encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
      # one hot encode
      encoded = to_categorical(encoded, num_classes=len(mapping))
      # predict character
      if temp is None:
        yhat = model.predict_classes(encoded, verbose=0)
      else:
        predictions = model.predict(encoded, verbose=0)[0]
        yhat = sample(predictions,temp)
      # reverse map integer to character
      out_char = ''
      for char, index in mapping.items():
          if index == yhat:
              out_char = char
              break          

      # append to input
      in_text += char

      #We only want the new part of the string, not including the original seed text
      res = in_text[len(seed_text)+1:-1]
      #End when the new text has the correct number of syllables
      if len(res) > 0 and nsyl(res) == n_syllables:
        return res

In [0]:
import nltk
nltk.download('cmudict')

[nltk_data] Downloading package cmudict to /root/nltk_data...
[nltk_data]   Package cmudict is already up-to-date!


True

In [0]:

#syllable checker
from nltk.corpus import cmudict
d = cmudict.dict()

def nsyl(word):
    try:
        return [len(list(y for y in x if y[-1].isdigit())) for x in d[word.lower()]]
    except KeyError:
        #if word not found in cmudict
        return syllables(word)

def syllables(word):
    #referred from stackoverflow.com/questions/14541303/count-the-number-of-syllables-in-a-word
    count = 0
    vowels = 'aeiouy'
    word = word.lower()
    if word[0] in vowels:
        count +=1
    for index in range(1,len(word)):
        if word[index] in vowels and word[index-1] not in vowels:
            count +=1
    if word.endswith('e'):
        count -= 1
    if word.endswith('le'):
        count += 1
    if count == 0:
        count += 1
    return count

In [0]:
#Generate poem line-by-line
total_poem = ['Shall I compare thee to a summer\'s day?']
for i in range(1,11):
  total_poem.append(generate_seq(model, mapping, 40, total_poem[i-1], 2000,10,temp=1))
for l in total_poem:
  print(l)

  # Remove the CWD from sys.path while we load stuff.


Shall I compare thee to a summer's day?
Thus these of a good a himash to
nd. 88 So dring my heart is, or virsur see so
 He gener tom, I some to as tho
 wasty wo hot every of Tied, A
 annour engiad in quise and like 
ven turny in thish althery sover
 Thou art notriss, and deaphrivious mandra
e, And and lovern write refore 
y a thought Pruisues for this bid such sta
s when no shart nor welf and all deserve,


In [0]:
#Temperature = 1.5
total_poem = ['Shall I compare thee to a summer\'s day?']
for i in range(1,11):
  total_poem.append(generate_seq(model, mapping, 40, total_poem[i-1], 2000,10,temp=1.5))
for l in total_poem:
  print(l)

  # Remove the CWD from sys.path while we load stuff.


Shall I compare thee to a summer's day?
Who eye of cortcy mince anfeello
y, On simmous ilte with whe tenst tho
 king, whe conear; And wort of imponfa
now erized her mupy. 30 Th'ne singga
ngeds (an owr tinaugis, tell's khe lie ed
efk: No, Odm: 3 Me lovities o'ringres
eife; awas? Or mely ix morth o
 that lile, If the staneting kingng tho
 might, in or. Naching I sauly spity
 with sweetlequiages, For hang you lavro


In [0]:
#Temperature = 0.75
total_poem = ['Shall I compare thee to a summer\'s day?']
for i in range(1,11):
  total_poem.append(generate_seq(model, mapping, 40, total_poem[i-1], 2000,10,temp=0.75))
for l in total_poem:
  print(l)
  # print('\n')

  # Remove the CWD from sys.path while we load stuff.


Shall I compare thee to a summer's day?
No love in a to thou art mine 
hade, To make put in thy fall wo
th worst no keel, What now was not made thy
fair why so greet, The widom my self thy
all my love but with thy duts. 44 Who lo
e was thou last hor had thy heary wi
h dis, And it be noternds and to make 
he eye ins, Now is an the eyes of the 
espest, And these wast becomare 
hou mayst thine earth, Which highte or ev


In [0]:
#Temperature = 0.25
total_poem = ['Shall I compare thee to a summer\'s day?']
for i in range(1,11):
  total_poem.append(generate_seq(model, mapping, 40, total_poem[i-1], 2000,10,temp=0.25))
for l in total_poem:
  print(l)
  # print('\n')

  # Remove the CWD from sys.path while we load stuff.


Shall I compare thee to a summer's day?
Why should in this thy show me despeci
st, But the world with the sun a worth of thee,
That thou art thou art thou art thou sight, A
d thou shalt to the same and the worl's fa
e, And thou art thou art thou hast thou ther
, The earth spend of the sun in the fair, A
d the worth of the world with the sun and la
d, That thou art thou art thou didst that I no
 so, He love doth shadows to the pa
nting thee, That in the store to my sel


# Generate haikus

In [0]:
#Generate poem line-by-line
total_poem = ['The Old Pond ']
# for i in range(1,11):
total_poem.append(generate_seq(model, mapping, 40, total_poem[0], 2000,5)) # 5 syllablues
total_poem.append(generate_seq(model, mapping, 40, total_poem[1], 2000,7)) # 7 syllables
total_poem.append(generate_seq(model, mapping, 40, total_poem[2], 2000,5))  # 5 syllablues
# total_poem[1] = total_poem[0] + total_poem[1]
# total_poem = total_poem[1:]
for l in total_poem:
  print(l)

The Old Pond 
hat thou art thou a
l dead, The earth which in the wo
th of thy sell, That tho


In [0]:
#Generate poem line-by-line
total_poem = ['Over the Wintry ']
# for i in range(1,11):
total_poem.append(generate_seq(model, mapping, 40, total_poem[0], 2000,5)) # 5 syllablues
total_poem.append(generate_seq(model, mapping, 40, total_poem[1], 2000,7)) # 7 syllables
total_poem.append(generate_seq(model, mapping, 40, total_poem[2], 2000,5))  # 5 syllablues
# total_poem[1] = total_poem[0] + total_poem[1]
# total_poem = total_poem[1:]
for l in total_poem:
  print(l)

Over the Wintry 
 am add the store,
The eyes that thou art thou a
t thou art truth, And the 


In [0]:
#Generate poem line-by-line
total_poem = ['A Poppy Blooms ']
# for i in range(1,11):
total_poem.append(generate_seq(model, mapping, 40, total_poem[0], 2000,5)) # 5 syllablues
total_poem.append(generate_seq(model, mapping, 40, total_poem[1], 2000,7)) # 7 syllables
total_poem.append(generate_seq(model, mapping, 40, total_poem[2], 2000,5))  # 5 syllablues
# total_poem[1] = total_poem[0] + total_poem[1]
# total_poem = total_poem[1:]
for l in total_poem:
  print(l)

A Poppy Blooms 
aty doth which tho
 might, And thou art thou art tho
 art thou mayst to my


In [0]:
#Generate poem line-by-line
total_poem = ['']
# for i in range(1,11):
total_poem.append(generate_seq(model, mapping, 40, total_poem[0], 2000,5)) # 5 syllablues
total_poem.append(generate_seq(model, mapping, 40, total_poem[1], 2000,7)) # 7 syllables
total_poem.append(generate_seq(model, mapping, 40, total_poem[2], 2000,5))  # 5 syllablues
# total_poem[1] = total_poem[0] + total_poem[1]
# total_poem = total_poem[1:]
for l in total_poem:
  print(l)


Coworndered wher
 I and thou art thou had, I
so beauty's true so
