<a href="https://colab.research.google.com/github/brbalaji-colab/acg/blob/main/tf-gentext.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [6]:
frank_url = 'https://storage.googleapis.com/acg-datasets/tiny_frankenstein.tgz'
cache_dir = '.'
cache_subdir = 'data'
tf.keras.utils.get_file('tiny_frankenstein.tgz', frank_url, extract=True,
                        cache_dir=cache_dir, cache_subdir=cache_subdir)

'./data/tiny_frankenstein.tgz'

In [7]:
# Load data
frank_file = f'{cache_dir}/{cache_subdir}/tiny_frankenstein.txt'

with open(frank_file, 'r') as f:
    frank_data = f.read().lower()

In [8]:
# Learn tokens
tokenizer = Tokenizer()
tokenizer.fit_on_texts([frank_data])
known_words = len(tokenizer.word_index)
total_tokens = known_words + 1  # padding token

In [9]:
# Convert text to tokens
frank_tokens = tokenizer.texts_to_sequences([frank_data])[0]

In [10]:
# Create sequences
def wrangle_data(sequence, examples, batch_size):
    examples = examples + 1
    seq_expand = tf.expand_dims(sequence, -1)
    dataset = tf.data.Dataset.from_tensor_slices(seq_expand)
    dataset = dataset.window(examples, shift=1, drop_remainder=True)
    dataset = dataset.flat_map(lambda b: b.batch(examples))
    dataset = dataset.map(lambda x: (x[:-1], x[-1]))
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

In [11]:
seq_length = 72
train_data = wrangle_data(frank_tokens, seq_length, 64)

In [12]:
def bd_rnn(token_count, sequence_length):
    new_model = tf.keras.Sequential([
        tf.keras.layers.Embedding(token_count, 32, input_length=sequence_length),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128)),
        tf.keras.layers.Dense(token_count, activation='softmax')
    ])
    new_model.compile(optimizer=tf.keras.optimizers.Adam(0.03), loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
    return new_model

In [13]:
model = bd_rnn(total_tokens, seq_length)
history = model.fit(train_data, epochs=10)

model.save('frankenstein.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
!nvidia-smi

Tue Jun 20 17:37:19 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   71C    P0    32W /  70W |    861MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [16]:
# Predict text
token_lookup = {v: k for k, v in tokenizer.word_index.items()}

seed = frank_tokens[-seq_length:]
seed_text = ""

for t in seed:
    seed_text += token_lookup[t] + " "
print(seed_text)

gen_tokens = 50

output = []

for _ in range(gen_tokens):
    tokens = pad_sequences([seed], maxlen=seq_length, padding='pre', truncating='pre')
    prediction = model.predict(tokens)
    next_token = np.argmax(prediction)
    output.append(token_lookup[next_token + 1])
    seed.append(next_token)

print(' '.join(output))

wickedness these motives urged me to comply with his demand we crossed the ice therefore and ascended the opposite rock the air was cold and the rain again began to descend we entered the hut the fiend with an air of exultation i with a heavy heart and depressed spirits but i consented to listen and seating myself by the fire which my odious companion had lighted he thus began his tale 
no and woe to striving cold her to but men valley now which be and murderer's i spirit as to but approach to would ask our sorrow many which an foresaw that and england greatest body like unhappiness had to before in secrets that and intended i pursuit your to
