In [11]:
# WEEK 4 -> TRANSFORMER TEXT GENERATION


# Importing Libraries
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os


# Loading Text File
with open("training_data.txt", "r", encoding="utf-8") as f:
    text = f.read()

print("Total characters:", len(text))


# Creating Vocabulary
vocab = sorted(set(text))

vocab_size = len(vocab)

print("Vocabulary size:", vocab_size)


# Character to Number
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)


# Converting Text to Numbers
text_as_int = np.array([char2idx[c] for c in text])


# Creating Sequences
seq_length = 100

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

sequences = char_dataset.batch(seq_length+1, drop_remainder=True)


# Input and Target Split
def split_input_target(chunk):

    input_text = chunk[:-1]
    target_text = chunk[1:]

    return input_text, target_text


dataset = sequences.map(split_input_target)


# Shuffle and Batch
BATCH_SIZE = 64
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)


# Positional Embedding Layer
class PositionalEmbedding(tf.keras.layers.Layer):

    def __init__(self, vocab_size, embed_dim, max_len=1000):

        super().__init__()

        self.token_emb = tf.keras.layers.Embedding(
            input_dim=vocab_size,
            output_dim=embed_dim
        )

        self.pos_emb = tf.keras.layers.Embedding(
            input_dim=max_len,
            output_dim=embed_dim
        )


    def call(self, x):

        maxlen = tf.shape(x)[1]

        positions = tf.range(start=0, limit=maxlen, delta=1)

        positions = self.pos_emb(positions)

        x = self.token_emb(x)

        return x + positions



# Transformer Block
class TransformerBlock(tf.keras.layers.Layer):

    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):

        super().__init__()

        self.att = tf.keras.layers.MultiHeadAttention(
            num_heads=num_heads,
            key_dim=embed_dim
        )

        self.ffn = tf.keras.Sequential([
            tf.keras.layers.Dense(ff_dim, activation="relu"),
            tf.keras.layers.Dense(embed_dim),
        ])

        self.layernorm1 = tf.keras.layers.LayerNormalization()
        self.layernorm2 = tf.keras.layers.LayerNormalization()

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)


    def call(self, inputs, training=False):

        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)

        out1 = self.layernorm1(inputs + attn_output)

        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)

        return self.layernorm2(out1 + ffn_output)



# Building Model
embed_dim = 256
num_heads = 4
ff_dim = 512


inputs = tf.keras.layers.Input(shape=(None,))


embedding_layer = PositionalEmbedding(
    vocab_size, embed_dim
)

x = embedding_layer(inputs)


transformer_block = TransformerBlock(
    embed_dim, num_heads, ff_dim
)

x = transformer_block(x, training=True)


x = tf.keras.layers.Dense(vocab_size)(x)


model = tf.keras.Model(inputs=inputs, outputs=x)


# Compiling the Model
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True
)

model.compile(
    optimizer="adam",
    loss=loss_fn
)

model.summary()


# Training the Model
EPOCHS = 1

history = model.fit(dataset, epochs=EPOCHS)


# Text Generation

def generate_text(model, start_string, num_generate=400):

    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    text_generated = []

    temperature = 0.8


    for i in range(num_generate):

        predictions = model(input_eval)

        predictions = predictions[:, -1, :] / temperature

        predicted_id = tf.random.categorical(
            predictions, num_samples=1
        )[0,0].numpy()

        input_eval = tf.expand_dims(
            [predicted_id], 0
        )

        text_generated.append(
            idx2char[predicted_id]
        )

    return start_string + ''.join(text_generated)



# Generating Sample Text

print("\n\nGenerated Shakespeare Style Text:\n")

print(generate_text(
    model,
    start_string="My lord,",
    num_generate=500
))


Total characters: 1115394
Vocabulary size: 65


[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m361s[0m 2s/step - loss: 2.7079


Generated Shakespeare Style Text:

My lord, IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIKRCED   oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


In this code, I implemented a Transformer-based model using TensorFlow to generate text in Shakespearean style, as asked in the Week-4 assignment.

First, I loaded the Shakespeare text file and created a vocabulary of all characters present in the dataset. Then, I converted each character into numerical form so that it can be processed by the neural network. After this, I prepared input and target sequences for training.

Next, I created a positional embedding layer to add information about the position of each character in a sentence. This helps the Transformer understand the order of words. I also implemented a Transformer block using multi-head self-attention, feed-forward layers, normalization, and dropout.

Using these layers, I built a Transformer model that learns relationships between characters in the text. Then, I compiled the model using the Adam optimizer and trained it on the Shakespeare dataset for a few epochs.

After training, I used the model to generate new text by giving a starting sentence. The model predicts the next character repeatedly and forms new sentences in Shakespearean style.

Due to limited training time and hardware constraints, the model was trained for a small number of epochs(=1), so the generated text is basic. However, the model successfully learns patterns from the data and demonstrates text generation.