## Assignment 11

Author: Anjani Bonda<br>
Date: 2023-05-28

#### Source Data (Enron Corpus)

In [9]:
## Get the source data

import keras
import numpy as np

path = keras.utils.get_file(
'nietzsche.txt',
origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('Corpus length:', len(text))

Corpus length: 600893


In [10]:
# Vectorize the sequences

maxlen = 60  
step = 4  
sentences = []  
next_chars = [] 

for x in range(0, len(text) - maxlen, step):
    sentences.append(text[x: x + maxlen])
    next_chars.append(text[x + maxlen])

print('# of sequences:', len(sentences))

# List unique chars in the corpus dataset
chars = sorted(list(set(text))) 
print('Distinct/Unique chars:', len(chars))
# indices mapping
char_indices = dict((char, chars.index(char)) for char in chars) 


# of sequences: 150209
Distinct/Unique chars: 57


In [11]:
# Using One-hot to encode chars to arrays:
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=bool)
y = np.zeros((len(sentences), len(chars)), dtype=bool)
for m, sentence in enumerate(sentences):
    for n, char in enumerate(sentence):
        x[m, n, char_indices[char]] = 1
    y[m, char_indices[next_chars[m]]] = 1

#### Build Model

In [12]:
# Single-layer LSTM model for prediction of next char

from keras import layers
import tensorflow as tf
model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

2023-05-29 03:21:48.790871: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-29 03:21:48.793496: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-29 03:21:48.795306: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

In [13]:
# Compile model
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [14]:
# Function to sample the next char 
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

#### Text Generation Loop 

In [16]:
import random
import sys
from pathlib import Path

inum = 1
results_dir = Path("/home/jovyan/dsc650/dsc650/assignments/assignment11")

for epoch in range(1, 11):
    print('\nepoch', epoch)
    model.fit(x, y, batch_size=128, epochs=1)
    start_index = random.randint(0, len(text) - maxlen -1)
    generated_text = text[start_index: start_index + maxlen]
    print('\n--- Generate with seed: "' + generated_text + '"')

    for temperature in [0.8, 1.0, 1.2]:
        print('\n--- Temperature:', temperature)
        print("--- Start text")
        sys.stdout.write(generated_text)
        print("\n--- End text")
        print("--- Start Loop ---")
        for i in range(300):
            sampled = np.zeros((1, maxlen, len(chars)))
            for j, char in enumerate(generated_text):
                sampled[0, j, char_indices[char]] = 1.
            preds = model.predict(sampled, verbose = 0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]
            generated_text += next_char
            generated_text = generated_text[1:]
            sys.stdout.write(next_char)
        print("\n--- End Loop ---")
        
        print("\nPrint Generated Text for temperature {}".format(temperature))
        print(generated_text)

        if inum <= 21 and temperature in [1, 1.2]:
            print("\nWrite generated text for temperature {} to file: {}".format(temperature, inum))
            output_file = 'LSTM_Text_Generated_Temperature_'+ str(temperature) +'_EPOCH_'+str(epoch)+'_Sample_' + str(inum) + '.txt'
            result_model_file = results_dir.joinpath(output_file)
            f = open(result_model_file, "w")
            f.write(generated_text)
            f.close()
            inum+= 1


epoch 1


2023-05-29 03:40:16.719866: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-29 03:40:16.722417: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-29 03:40:16.724237: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus


--- Generate with seed: "folly (just look at
those poor historians, the sybels and tr"

--- Temperature: 0.8
--- Start text
folly (just look at
those poor historians, the sybels and tr
--- End text
--- Start Loop ---


2023-05-29 03:42:56.723409: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-29 03:42:56.725556: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-29 03:42:56.727232: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

ian and must a many of man or for this of the comman and muse of a man man belearoug: this persection. which into ampetharity, if fremm, and have anticned beliegs of mune sphausioncan stathy the we hor of instincies, the prestentand ene of if in thish indear, or be fingress of their of its seantion 
--- End Loop ---

Print Generated Text for temperature 0.8
if in thish indear, or be fingress of their of its seantion 

--- Temperature: 1.0
--- Start text
if in thish indear, or be fingress of their of its seantion 
--- End text
--- Start Loop ---
woll the pleasits, and ringion.

 1ho compar prentirat and must and sche mas fret-one
regrinacle rodres, penciacilm we cantesting
minalive corseescoserait it"went, than. a bemarice
of pry:n instint i some now genery
ithive for the self a
duing. to and neverough ond seifl! bulled of by arthery to cro
--- End Loop ---

Print Generated Text for temperature 1.0
ing. to and neverough ond seifl! bulled of by arthery to cro

Write generated text for te