In [20]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

In [2]:
path = "https://raw.githubusercontent.com/TirendazAcademy/Deep-Learning-with-TensorFlow/main/Data/tinyshakespeare.txt"
filepath = keras.utils.get_file("shakespeare.txt", path)
with open(filepath) as f:
    text = f.read()


In [16]:
tokenizer=Tokenizer(char_level=True)
tokenizer.fit_on_texts(text)
sequences=tokenizer.texts_to_sequences([text])[0]
padded_sequences=pad_sequences([sequences],maxlen=100,padding="post")

In [17]:
length=100
seq_length=len(sequences)
train_size = int(0.8 * seq_length)
valid_size = int(0.1 * seq_length)
test_size = seq_length - train_size - valid_size

train_sequences = sequences[:train_size]
valid_sequences = sequences[train_size:train_size+valid_size]
test_sequences = sequences[train_size+valid_size:]


In [18]:
def create_dataset(sequences, length):
    X = []
    y = []
    for i in range(len(sequences) - length):
        X.append(sequences[i:i+length])
        y.append(sequences[i+length])
    return tf.data.Dataset.from_tensor_slices((np.array(X), np.array(y)))


In [21]:
train_set = create_dataset(train_sequences, length)
valid_set = create_dataset(valid_sequences, length)
test_set = create_dataset(test_sequences, length)
train_set = train_set.shuffle(10000).batch(64)
valid_set = valid_set.batch(64)
test_set = test_set.batch(64)

In [22]:
model=keras.models.Sequential([
    keras.layers.Embedding(input_dim=len(tokenizer.word_index)+2,output_dim=20),
    keras.layers.LSTM(128),
    keras.layers.Dense(len(tokenizer.word_index)+2,activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy",optimizer="adam",metrics=["accuracy"])

In [23]:
model.fit(train_set,epochs=1,validation_data=valid_set)

[1m13941/13941[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1117s[0m 80ms/step - accuracy: 0.3898 - loss: 2.1100 - val_accuracy: 0.4905 - val_loss: 1.7034


<keras.src.callbacks.history.History at 0x2c70703db80>

In [24]:
test_loss, test_acc = model.evaluate(test_set)
print("Total_Accuracy:",(test_acc*100),"%")

[1m1742/1742[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 19ms/step - accuracy: 0.4542 - loss: 1.8560
Total_Accuracy: 45.38585841655731 %
