In [11]:
import tensorflow as tf
print(tf.__version__)

# Set CPU as available physical device
#my_devices = tf.config.experimental.list_physical_devices(device_type='CPU')
#tf.config.experimental.set_visible_devices(devices= my_devices, device_type='CPU')

# To find out which devices your operations and tensors are assigned to
#tf.debugging.set_log_device_placement(True)

# Create some tensors and perform an operation
a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
c = tf.matmul(a, b)

print(c)

2.1.0
tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32)


In [24]:
import os
# os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"
os.environ["KERAS_BACKEND"] = "tensorflow"
import numpy
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [25]:
# load ascii text and covert to lowercase
filename = "FAHRENHEIT 451.txt"
raw_text = open(filename, 'r', encoding='utf-8').read() 
raw_text = raw_text.lower()

In [26]:
# create mapping of unique chars to integers, and a reverse mapping
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
print(char_to_int)

{'\n': 0, ' ': 1, '!': 2, '"': 3, "'": 4, '(': 5, ')': 6, ',': 7, '-': 8, '.': 9, '0': 10, '1': 11, '2': 12, '3': 13, '4': 14, '5': 15, '6': 16, '7': 17, '8': 18, '9': 19, ':': 20, ';': 21, '?': 22, '`': 23, 'a': 24, 'b': 25, 'c': 26, 'd': 27, 'e': 28, 'f': 29, 'g': 30, 'h': 31, 'i': 32, 'j': 33, 'k': 34, 'l': 35, 'm': 36, 'n': 37, 'o': 38, 'p': 39, 'q': 40, 'r': 41, 's': 42, 't': 43, 'u': 44, 'v': 45, 'w': 46, 'x': 47, 'y': 48, 'z': 49}


In [27]:
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  251222
Total Vocab:  50


In [28]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 256
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
  seq_in = raw_text[i:i + seq_length]
  seq_out = raw_text[i + seq_length]
  dataX.append([char_to_int[char] for char in seq_in])
  dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  250966


In [29]:
print(dataX[1])

[24, 31, 41, 28, 37, 31, 28, 32, 43, 1, 14, 15, 11, 1, 25, 48, 1, 41, 24, 48, 1, 25, 41, 24, 27, 25, 44, 41, 48, 0, 43, 31, 32, 42, 1, 38, 37, 28, 7, 1, 46, 32, 43, 31, 1, 30, 41, 24, 43, 32, 43, 44, 27, 28, 7, 1, 32, 42, 1, 29, 38, 41, 1, 27, 38, 37, 1, 26, 38, 37, 30, 27, 38, 37, 9, 0, 29, 24, 31, 41, 28, 37, 31, 28, 32, 43, 1, 14, 15, 11, 20, 0, 43, 31, 28, 1, 43, 28, 36, 39, 28, 41, 24, 43, 44, 41, 28, 1, 24, 43, 1, 46, 31, 32, 26, 31, 1, 25, 38, 38, 34, 8, 39, 24, 39, 28, 41, 1, 26, 24, 43, 26, 31, 28, 42, 1, 29, 32, 41, 28, 1, 24, 37, 27, 1, 25, 44, 41, 37, 42, 0, 39, 24, 41, 43, 1, 32, 0, 32, 43, 1, 46, 24, 42, 1, 24, 1, 39, 35, 28, 24, 42, 44, 41, 28, 1, 43, 38, 1, 25, 44, 41, 37, 0, 32, 43, 1, 46, 24, 42, 1, 24, 1, 42, 39, 28, 26, 32, 24, 35, 1, 39, 35, 28, 24, 42, 44, 41, 28, 1, 43, 38, 1, 42, 28, 28, 1, 43, 31, 32, 37, 30, 42, 1, 28, 24, 43, 28, 37, 7, 1, 43, 38, 1, 42, 28, 28, 1, 43, 31, 32, 37, 30, 42, 1, 25, 35, 24, 26, 34, 28, 37, 28, 27, 1, 24]


In [30]:
print(dataY)

[24, 37, 27, 1, 26, 31, 24, 37, 30, 28, 27, 9, 1, 46, 32, 43, 31, 1, 43, 31, 28, 1, 25, 41, 24, 42, 42, 1, 37, 38, 49, 49, 35, 28, 1, 32, 37, 1, 31, 32, 42, 1, 29, 32, 42, 43, 42, 7, 1, 46, 32, 43, 31, 1, 43, 31, 32, 42, 1, 30, 41, 28, 24, 43, 1, 39, 48, 43, 31, 38, 37, 1, 42, 39, 32, 43, 43, 32, 37, 30, 1, 32, 43, 42, 1, 45, 28, 37, 38, 36, 38, 44, 42, 1, 34, 28, 41, 38, 42, 28, 37, 28, 1, 44, 39, 38, 37, 1, 43, 31, 28, 1, 46, 38, 41, 35, 27, 7, 1, 43, 31, 28, 1, 25, 35, 38, 38, 27, 1, 39, 38, 44, 37, 27, 28, 27, 1, 32, 37, 1, 31, 32, 42, 1, 31, 28, 24, 27, 7, 1, 24, 37, 27, 1, 31, 32, 42, 1, 31, 24, 37, 27, 42, 1, 46, 28, 41, 28, 1, 43, 31, 28, 1, 31, 24, 37, 27, 42, 1, 38, 29, 1, 42, 38, 36, 28, 1, 24, 36, 24, 49, 32, 37, 30, 1, 26, 38, 37, 27, 44, 26, 43, 38, 41, 1, 39, 35, 24, 48, 32, 37, 30, 1, 24, 35, 35, 1, 43, 31, 28, 1, 42, 48, 36, 39, 31, 38, 37, 32, 28, 42, 1, 38, 29, 1, 25, 35, 24, 49, 32, 37, 30, 1, 24, 37, 27, 1, 25, 44, 41, 37, 32, 37, 30, 1, 43, 38, 1, 25, 41, 32, 37, 

In [31]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [None]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256,input_shape=(X.shape[1], X.shape[2]), return_sequences=True)) 
#model.add(LSTM(512,input_shape=(X.shape[1], X.shape[2]))) 
model.add(Dropout(0.1))
model.add(LSTM(256))
model.add(Dropout(0.1))
model.add(Dense(y.shape[1], activation='softmax')) 
model.compile(loss='categorical_crossentropy', optimizer='adam')
# define the checkpoint 
filepath="weights-improvement-{epoch:02d}-{loss:.4f}-256X256.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
# fit the model
model.fit(X, y, epochs=10, batch_size=32, callbacks=callbacks_list)

Epoch 1/10
Epoch 00001: loss improved from inf to 2.59491, saving model to weights-improvement-01-2.5949-256X256.hdf5
Epoch 2/10
Epoch 00002: loss improved from 2.59491 to 2.23678, saving model to weights-improvement-02-2.2368-256X256.hdf5
Epoch 3/10
Epoch 00003: loss improved from 2.23678 to 2.06502, saving model to weights-improvement-03-2.0650-256X256.hdf5
Epoch 4/10
Epoch 00004: loss improved from 2.06502 to 1.95897, saving model to weights-improvement-04-1.9590-256X256.hdf5
Epoch 5/10
Epoch 00005: loss improved from 1.95897 to 1.87842, saving model to weights-improvement-05-1.8784-256X256.hdf5
Epoch 6/10