# Model training

## Import packages

In [6]:
import tensorflow as tf
import pandas as pd
import os
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import random
import re
import matplotlib.pyplot as plt



In [2]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


2022-05-01 16:17:50.763339: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-01 16:17:50.815743: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-01 16:17:50.816366: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


## Read data

In [7]:
df = pd.read_csv('data/out.csv')

## Transform df to get array of poems (1 input to start with)

In [8]:
df = df[['0', '1', '2']].agg(lambda x: ','.join(x.values), axis=1)
array_of_poems = df.values.tolist()
array_of_poems[0]

'last red in the sky,a small girls moon face rises,over the counter'

### Show array

In [9]:
for i, sentence in enumerate(array_of_poems):
    if i == 3:
        break;
    print(sentence)
    print('\n')

last red in the sky,a small girls moon face rises,over the counter


christmas services,a cellular phone rings out,handels messiah


passover darkness ,before the buds burst open,a childs eyes in death




## Extend poems to match longest poem to get unified length

In [10]:
#maxlen = len(max(array_of_poems, key = len))

#for i in range(len(array_of_poems)):
#    array_of_poems[i] = array_of_poems[i].ljust(maxlen)


### Show array

In [11]:
for i, sentence in enumerate(array_of_poems):
    if i == 3:
        break;
    print(len(sentence))
    print('\n')

66


61


68




In [12]:
text = ""

print(len(array_of_poems) - 1)

#for i in range(len(array_of_poems) - 1):
for i in range(150000):
    text = text + array_of_poems[i]
    text = text + "/"




438233


## Get unique chars in corpus

In [13]:
vocab = sorted(set(text))
print(f'{len(vocab)} unique chars')
vocab_size = len(vocab)

29 unique chars


In [14]:
vocab

[' ',
 ',',
 '/',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

## Before training, convert strings to numerical representation

## Create training batches

In [17]:
print("Corpus length:", len(text))

chars = sorted(list(set(text)))
print("Total chars:", len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 100
step = 11
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i : i + maxlen])
    next_chars.append(text[i + maxlen])
print("Number of sequences:", len(sentences))

x = np.zeros((len(sentences), maxlen, len(chars)), dtype=bool)
y = np.zeros((len(sentences), len(chars)), dtype=bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Corpus length: 9956223
Total chars: 29
Number of sequences: 905103


In [12]:
x.shape[1]

100

In [13]:
x.shape[2]

29

In [14]:
model = keras.Sequential(
    [
        layers.LSTM(128, input_shape=(x.shape[1], x.shape[2]), return_sequences=True),
        layers.Dropout(0.2),
        layers.LSTM(128, return_sequences=True),
        layers.Dropout(0.2),
        layers.LSTM(64),
        layers.Dense(len(chars), activation="softmax"),
    ]
)

optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)



2022-05-01 16:18:21.467181: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-05-01 16:18:21.468737: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-01 16:18:21.469406: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-01 16:18:21.469810: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA 

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100, 128)          80896     
                                                                 
 dropout (Dropout)           (None, 100, 128)          0         
                                                                 
 lstm_1 (LSTM)               (None, 100, 128)          131584    
                                                                 
 dropout_1 (Dropout)         (None, 100, 128)          0         
                                                                 
 lstm_2 (LSTM)               (None, 64)                49408     
                                                                 
 dense (Dense)               (None, 29)                1885      
                                                                 
Total params: 263,773
Trainable params: 263,773
Non-trai

In [19]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds)
    return np.argmax(probas)

In [17]:
epochs = 20
batch_size = 512

input_data = x
output_data = y

history = []

for epoch in range(epochs):
    history = model.fit(input_data, output_data, batch_size=batch_size, epochs=1)

    print()

    print_poem = 0
    temperature = 0.5
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated = ""

    seed =  text[start_index : start_index + maxlen]
    print('...Generating with seed: "' + seed + '"')

    for i in range(400):
        x_pred = np.zeros((1, len(seed), len(chars)))
        for t, char in enumerate(seed):
            x_pred[0, t, char_indices[char]] = 1.0
        preds = model.predict(x_pred, verbose=0)[0]

        next_index = sample(preds, temperature)
        next_char = indices_char[next_index]
        seed = seed[1:] + next_char
        generated += next_char
        #if print_poem == 1:
        print(next_char, end="")
        #if next_char == "/":
        #    print_poem = print_poem ^ 1

    print()
    haiku_gen = re.findall('/(.+?)/', generated)

    print()
    for i in range(len(haiku_gen)):
        print(haiku_gen[i])
    print()





2022-05-01 16:18:23.497228: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 2624798700 exceeds 10% of free system memory.
2022-05-01 16:18:25.278986: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 2624798700 exceeds 10% of free system memory.
2022-05-01 16:18:31.527294: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8100



...Generating with seed: "letting us know/i wanted to throw, war and peace but it was on,my kindle so no/look at their picture"
/i really though or,the best will do you/i cant see the gried/i know what like and the, dischoring the chool he bear, what textach and when the part/and missing and the part, the are again where at it/thank that the may it/when the plied that do, can the world do the prese/just stars with people/so and you people, be so the the art your a, mind the spint that i was leart/shit the same on i have, y

i really though or,the best will do you
i know what like and the, dischoring the chool he bear, what textach and when the part
thank that the may it
just stars with people



2022-05-01 16:20:08.617583: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 2624798700 exceeds 10% of free system memory.
2022-05-01 16:20:10.792493: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 2624798700 exceeds 10% of free system memory.



...Generating with seed: "ot about who, started what its about who,does did it better/cried like a baby, at a star is born and"
, i want to be is a friends/i dont be they could well,use i said what you/and i was dont be, the book about the ostion/im that i dont want, a bitch it is a shitter/i dont always let you,i dont ever love, that are my best is some had/i want you had a, someone so to my same that well, my way i would not mean of, im the both i love to gonna,be the look to that seam, the best the same and i wont be, m

i dont be they could well,use i said what you
im that i dont want, a bitch it is a shitter



2022-05-01 16:21:47.714635: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 2624798700 exceeds 10% of free system memory.



...Generating with seed: "hate, selfish stingy people i,would never relate/looking forward to, staying with my aunt cause my,f"
eel discortans is, is the to be my serious, well is for what you think base/i was dont all the, best work and is a lot/when you all the back, the time to be the best/in my hand to suck,someone the people see, to like you special deserve, to some is the newic with, the hurt from the singer how/deleted it was, contrate a sexidal, i want to be in the prodect, story to spend the and what, i can is clo

i was dont all the, best work and is a lot
in my hand to suck,someone the people see, to like you special deserve, to some is the newic with, the hurt from the singer how


...Generating with seed: "sion for,too many pictures/when you pretend to, walk around the strip pole thats,just skittles and s"
ometimes/the way shit in her, i need to really need to, do that i didnt got been an excise,to have a rases/if you want to be, a start brand i was not because/i think to

  preds = np.log(preds) / temperature


e, happy to go to this new, panty you just wanna dine/the really man a good,sunday and so when,you could watch someone never, be but it doesnt really, be anything day is it when, my back in this lol you really, is how good season are it/i dont see the call/i know what the dream,and really really, in th

the really man a good,sunday and so when,you could watch someone never, be but it doesnt really, be anything day is it when, my back in this lol you really, is how good season are it


...Generating with seed: " if you just got,into weightlifting/city attorneys, have probably already screamed,at him i dont kno"
w, in the only fair my world/i need to be down, the check the only time/i think i was a, start the start the best to the,best things it is no, i miss the world in the only, same and i think the band/i dont have to get, the only two through the accould, go to fully on the sist, is the same congrather is, all the problem to have a, tomorrow so i love the, car is everything the only

In [None]:
model.save('myModel.h5')

In [15]:
model = keras.models.load_model('myModel.h5')

In [31]:

temperature = 0.5
start_index = random.randint(0, len(text) - maxlen - 1)
generated = ""

seed =  text[start_index : start_index + maxlen]

print('...Generating with seed: "' + seed + '"')

for i in range(2000):
    x_pred = np.zeros((1, len(seed), len(chars)))
    for t, char in enumerate(seed):
        x_pred[0, t, char_indices[char]] = 1.0
    preds = model.predict(x_pred, verbose=0)[0]

    next_index = sample(preds, temperature)
    next_char = indices_char[next_index]
    seed = seed[1:] + next_char
    generated += next_char
    #if print_poem == 1:
    print(next_char, end="")
    if next_char == "/":
        print()
        print()



...Generating with seed: "rse it does/ive gotten nothing, done today and i feel like,garbage about it/getting that bread is, h"
ow i alw

  preds = np.log(preds) / temperature


ays blow the same,is a breaking stuff/

have you are any, sister to the lol they have,the first and what you/

he you probably show, the class people you are control,this is a lot of/

i dont want someone, and i would be any of,starting and not for/

you really have a, creative of her your sense,than any year aw/

i like a bitches, are man the party sounds the,day pretty carding/

i have a first side, with the real fuck bitch is the,fair and always been/

i still can care it, about it when you shouldnt,see the week and it/

my start class body, does the staman break my pretty,life be the other/

i feel have a time, in the day but i dont want,to get this stranger/

i like to explain, the for the only baby,is the night media/

the weekend in the one, make you a match stranger you,start to go to have/

the only of the, shot of house and go the date,this is this of year/

the love and stand the, time and any shot things bro,the playing for the stars/

i find the bored, to have one of the d