In [123]:
from tensorflow import keras
from pathlib import Path

import tensorflow as tf
tf.reset_default_graph()
# tf_session = tf.compat.v1.Session()
tf_session = tf.Session()

from tensorflow.keras import backend as K
K.set_session(tf_session)

from tensorflow.keras.callbacks import ModelCheckpoint,  CSVLogger
from tensorflow.keras.layers import Add, Dense, Input, LSTM
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical

import numpy as np
import pandas as pd
from sklearn.externals import joblib
# import joblib


# Local library with model definitions for training and generating
from models import Generator, create_training_model

In [124]:
# Creates a graph.
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
c = tf.matmul(a, b)
# Creates a session with log_device_placement set to True.
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
# Runs the op.
print(sess.run(c))

[[22. 28.]
 [49. 64.]]


# Load Input

In [96]:
# Settings

# Percent of samples to use for training, might be necessary if you're running out of memory
sample_size = 1

# The latent dimension of the LSTM
latent_dim = 2048

# Number of epochs to train for
epochs = 20

root_path = Path('../../..')
input_path = root_path / 'input'
poem_path = input_path / 'poems'
haiku_path = poem_path / 'haikus.csv'

name = 'all_data_test_2'
output_dir = Path('output_%s' % name)
output_dir.mkdir()

In [97]:
df = pd.read_csv(str(haiku_path))
df = df.sample(frac=sample_size)
df

Unnamed: 0,0,1,2,source,0_syllables,1_syllables,2_syllables
130322,my printer is such,a piece of shit why do we,even still have it,twaiku,5,7,5
23213,red berries,at the tip of each branch,the setting sun,sballas,3,6,4
61691,So that explains why,the Burning Bush didn't have,limbs No fire arms,twaiku,5,67,45
4905,my soul lashes out at all the meanness,bred in soullessness,few care who they hurt reason for sadness,img2poems,10,5,10
4014,church graveyard,a cloud of crows hover,over stone angels,tempslibres,3,6,5
...,...,...,...,...,...,...,...
1814,July Fourth fireworks---,I stand next to,an Iraqi refugee,tempslibres,5,4,7
51875,I hope someone buys,me sunflowers tomorrow,I'll be so happy,twaiku,5,7,5
78785,Been playing a Red,Faction Guerilla It's good,Good for what ails ya,twaiku,5,7,5
42262,Anyone have snow,gear I can borrow All I,need is pants and boots,twaiku,5,7,5


# Format Input for Training

In [98]:
# Duplicate lines with ambiguous syllable counts
# (syllable counts where there is a comma because
# multiple pronounciations are acceptable)

lines = set([0, 1, 2])

for i in range(3):
    lines.remove(i)
    df = df[[
        '0', '1', '2',
        #'1_syllables', '2_syllables'
    ] + ['%s_syllables' % j for j in lines]].join(
        df['%s_syllables' % i].str.split(
            ',', expand=True
        ).stack(-1).reset_index(
            level=1, drop=True
        ).rename('%s_syllables' % i)
    ).drop_duplicates()
    lines.add(i)

df.head(10)

Unnamed: 0,0,1,2,0_syllables,1_syllables,2_syllables
0,Memorial Day --,a shadow for each,white cross,5,5,2
1,spring rain -,as the doctor speaks,i think of lilacs,2,5,5
1,spring rain -,as the doctor speaks,i think of lilacs,3,5,5
2,spring moonset --,a rice ball for,breakfast,3,4,2
2,spring moonset --,a rice ball for,breakfast,4,4,2
3,sunny afternoon,an old man lingers,near the mailbox,5,5,4
4,cinco de mayo,horses roll,in the shallows,5,3,4
5,quitting time,the smell of rain,in the lobby,3,4,4
6,waves,slowly cresting towards shore,a faint moon,1,6,3
6,waves,slowly cresting towards shore,a faint moon,1,7,3


In [99]:
# Drop samples that are longer that the 99th percentile of length

max_line_length = int(max([df['%s' % i].str.len().quantile(.99) for i in range(3)]))
df = df[
    (df['0'].str.len() <= max_line_length) & 
    (df['1'].str.len() <= max_line_length) & 
    (df['2'].str.len() <= max_line_length)
].copy()
df

Unnamed: 0,0,1,2,0_syllables,1_syllables,2_syllables
0,Memorial Day --,a shadow for each,white cross,5,5,2
1,spring rain -,as the doctor speaks,i think of lilacs,2,5,5
1,spring rain -,as the doctor speaks,i think of lilacs,3,5,5
2,spring moonset --,a rice ball for,breakfast,3,4,2
2,spring moonset --,a rice ball for,breakfast,4,4,2
...,...,...,...,...,...,...
143132,I'm not asking did,you say it nor clarify,what you said neither,5,7,5
143133,You are truly a,moron or a liar I'm,inclined to think both,5,7,5
143134,Ain't no selfie on,this earth that's gonna make me,like Theresa May,5,7,5
143135,is doing a great,job turning Independents,into Democrats,5,7,5


In [100]:
# Pad the lines to the max line length with new lines
for i in range(3):
    # For input, duplicate the first character
    # TODO - Why?
    df['%s_in' % i] = (df[str(i)].str[0] + df[str(i)]).str.pad(max_line_length+2, 'right', '\n')
    
    # 
    #df['%s_out' % i] = df[str(i)].str.pad(max_line_len, 'right', '\n') + ('\n' if i == 2 else df[str(i+1)].str[0])
    
    # TODO - trying to add the next line's first character before the line breaks
    if i == 2: # If it's the last line
        df['%s_out' % i] = df[str(i)].str.pad(max_line_length+2, 'right', '\n')
    else: 
        # If it's the first or second line, add the first character of the next line to the end of this line.
        # This helps with training so that the next RNN has a better chance of getting the first character right.
        df['%s_out' % i] = (df[str(i)] + '\n' + df[str(i+1)].str[0]).str.pad(max_line_length+2, 'right', '\n')
    
max_line_length += 2

df

Unnamed: 0,0,1,2,0_syllables,1_syllables,2_syllables,0_in,0_out,1_in,1_out,2_in,2_out
0,Memorial Day --,a shadow for each,white cross,5,5,2,MMemorial Day --\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,Memorial Day --\na\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,aa shadow for each\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,a shadow for each\nw\n\n\n\n\n\n\n\n\n\n\n\n\n...,wwhite cross\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,white cross\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\...
1,spring rain -,as the doctor speaks,i think of lilacs,2,5,5,sspring rain -\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,spring rain -\na\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,aas the doctor speaks\n\n\n\n\n\n\n\n\n\n\n\n\...,as the doctor speaks\ni\n\n\n\n\n\n\n\n\n\n\n\...,ii think of lilacs\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,i think of lilacs\n\n\n\n\n\n\n\n\n\n\n\n\n\n\...
1,spring rain -,as the doctor speaks,i think of lilacs,3,5,5,sspring rain -\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,spring rain -\na\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,aas the doctor speaks\n\n\n\n\n\n\n\n\n\n\n\n\...,as the doctor speaks\ni\n\n\n\n\n\n\n\n\n\n\n\...,ii think of lilacs\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,i think of lilacs\n\n\n\n\n\n\n\n\n\n\n\n\n\n\...
2,spring moonset --,a rice ball for,breakfast,3,4,2,sspring moonset --\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,spring moonset --\na\n\n\n\n\n\n\n\n\n\n\n\n\n...,aa rice ball for\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,a rice ball for\nb\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,bbreakfast\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,breakfast\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\...
2,spring moonset --,a rice ball for,breakfast,4,4,2,sspring moonset --\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,spring moonset --\na\n\n\n\n\n\n\n\n\n\n\n\n\n...,aa rice ball for\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,a rice ball for\nb\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,bbreakfast\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,breakfast\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\...
...,...,...,...,...,...,...,...,...,...,...,...,...
143132,I'm not asking did,you say it nor clarify,what you said neither,5,7,5,II'm not asking did\n\n\n\n\n\n\n\n\n\n\n\n\n\...,I'm not asking did\n \n\n\n\n\n\n\n\n\n\n\n\n\...,you say it nor clarify\n\n\n\n\n\n\n\n\n\n\n...,you say it nor clarify\nw\n\n\n\n\n\n\n\n\n\n...,wwhat you said neither\n\n\n\n\n\n\n\n\n\n\n\n...,what you said neither\n\n\n\n\n\n\n\n\n\n\n\n\...
143133,You are truly a,moron or a liar I'm,inclined to think both,5,7,5,YYou are truly a\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n...,You are truly a\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n...,moron or a liar I'm\n\n\n\n\n\n\n\n\n\n\n\n\...,moron or a liar I'm\ni\n\n\n\n\n\n\n\n\n\n\n\...,iinclined to think both\n\n\n\n\n\n\n\n\n\n\n\...,inclined to think both\n\n\n\n\n\n\n\n\n\n\n\n...
143134,Ain't no selfie on,this earth that's gonna make me,like Theresa May,5,7,5,AAin't no selfie on\n\n\n\n\n\n\n\n\n\n\n\n\n\...,Ain't no selfie on\n \n\n\n\n\n\n\n\n\n\n\n\n\...,this earth that's gonna make me\n\n\n\n\n\n\...,this earth that's gonna make me\nl\n\n\n\n\n\...,llike Theresa May\n\n\n\n\n\n\n\n\n\n\n\n\n\n\...,like Theresa May\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n...
143135,is doing a great,job turning Independents,into Democrats,5,7,5,iis doing a great\n\n\n\n\n\n\n\n\n\n\n\n\n\n\...,is doing a great\n \n\n\n\n\n\n\n\n\n\n\n\n\n\...,job turning Independents\n\n\n\n\n\n\n\n\n\n...,job turning Independents\ni\n\n\n\n\n\n\n\n\n...,iinto Democrats\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\...,into Democrats\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n...


In [101]:
inputs = df[['0_in', '1_in', '2_in']].values

tokenizer = Tokenizer(filters='', char_level=True)
tokenizer.fit_on_texts(inputs.flatten())
n_tokens = len(tokenizer.word_counts) + 1

# X is the input for each line in sequences of one-hot-encoded values
X = to_categorical([
    tokenizer.texts_to_sequences(inputs[:,i]) for i in range(3)
], num_classes=n_tokens)

outputs = df[['0_out', '1_out', '2_out']].values

# Y is the output for each line in sequences of one-hot-encoded values
Y = to_categorical([
    tokenizer.texts_to_sequences(outputs[:,i]) for i in range(3)
], num_classes=n_tokens)

# X_syllables is the count of syllables for each line
X_syllables = df[['0_syllables', '1_syllables', '2_syllables']].values


#### Process X's for training

In [102]:
# your raw text poems
inputs[0, 0:3]

# tokenized
tks = tokenizer.texts_to_sequences(inputs[0,0:3])

# tokenized --> categorical (1-hot encoded)
to_categorical(tks)


array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        ...,
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.]]], dtype=float32)

#### Process Y's for Training

In [103]:
inputs[0, 0:3]

array(['MMemorial Day --\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n',
       'aa shadow for each\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n',
       'wwhite cross\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n'],
      dtype=object)

In [104]:
outputs[0, 0:3]

array(['Memorial Day --\na\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n',
       'a shadow for each\nw\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n',
       'white cross\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n'],
      dtype=object)

---
1. inputs[0, 0:3] --> contains 3 lines from a poem
2. processed by tokenizer looks like this: [[43, 5, 7, 1, 2, 5, 7, 0, 0, 0, 0, 0] ... X3]
3. becomes categorical (1 hot encoded) so [43] => [0, 0, 0, 0, 0, ... , 1, 0, 0, 0, 0]

^ This process gives you your *X* & basically *Y*

---

#### X (syllables)

In [105]:
X_syllables

array([['5', '5', '2'],
       ['2', '5', '5'],
       ['3', '5', '5'],
       ...,
       ['5', '7', '5'],
       ['5', '7', '5'],
       ['5', '7', '5']], dtype=object)

In [106]:
joblib.dump([latent_dim, n_tokens, max_line_length, tokenizer], str(output_dir / 'metadata.pkl'))

['output_all_data_test_2\\metadata.pkl']

# Training Model

In [107]:
training_model, lstm, lines, inputs, outputs = create_training_model(latent_dim, n_tokens)

filepath = str(output_dir / ("%s-{epoch:02d}-{loss:.2f}-{val_loss:.2f}.hdf5" % latent_dim))
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')

csv_logger = CSVLogger(str(output_dir / 'training_log.csv'), append=True, separator=',')

callbacks_list = [checkpoint, csv_logger]

In [108]:
lstm.units

2048

In [109]:
# create_training_model outputs...

training_model # keras training model

lstm # keras.layers --> LSTM layer

lines # 3 - TrainingLine's

#! Not USED
inputs # 6 - Tensors...
# char_input_line_1
# syllable_input_line_1
# ... 3x times

outputs # 3 - Tensors

[<tf.Tensor 'output_line_0/truediv:0' shape=(?, ?, 81) dtype=float32>,
 <tf.Tensor 'output_line_1/truediv:0' shape=(?, ?, 81) dtype=float32>,
 <tf.Tensor 'output_line_2/truediv:0' shape=(?, ?, 81) dtype=float32>]

In [110]:
training_model.fit(
    [
        X[0],
        X_syllables[:,0],
        X[1],
        X_syllables[:,1], 
        X[2],
        X_syllables[:,2]
    ],
    [
        Y[0],
        Y[1],
        Y[2]
    ], epochs=epochs, validation_split=.1
)

Train on 153139 samples, validate on 17016 samples
Epoch 1/20


KeyboardInterrupt: 

# Test Model

In [111]:
generator = Generator(lstm, lines, tf_session, tokenizer, n_tokens, max_line_length)

In [118]:
# import importlib
# importlib.reload(models)
generator.generate_haiku()

te  htect se3



TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'