#Implement a Transformer-based Language Model for text generation

In [None]:
# Install required libraries
!pip install tensorflow numpy

#1. Import Libraries and Sample Data

In [1]:
import tensorflow as tf
import numpy as np

# Sample tiny corpus
sentences = [
    "hello world",
    "how are you",
    "hello how are you",
    "hello you",
    "are you there"
]

# 2. Tokenization & Padding

In [2]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)

sequences = tokenizer.texts_to_sequences(sentences)
max_len = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')

vocab_size = len(tokenizer.word_index) + 1


# 3. Prepare Input and Target

In [3]:
X = padded_sequences[:, :-1]  # All except last token
y = padded_sequences[:, 1:]   # All except first token

y = tf.keras.utils.to_categorical(y, num_classes=vocab_size)


# 4. Build a Mini Transformer Model

In [4]:
from tensorflow.keras.layers import Input, Embedding, Dense, LayerNormalization, Dropout, MultiHeadAttention
from tensorflow.keras.models import Model

embed_dim = 64
num_heads = 2
ff_dim = 128

input_seq = Input(shape=(X.shape[1],))
embedding_layer = Embedding(input_dim=vocab_size, output_dim=embed_dim)(input_seq)

# Multi-Head Attention
attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)(embedding_layer, embedding_layer)
attn_output = Dropout(0.1)(attn_output)
out1 = LayerNormalization(epsilon=1e-6)(embedding_layer + attn_output)

# Feed-Forward
ffn = Dense(ff_dim, activation='relu')(out1)
ffn = Dense(embed_dim)(ffn)
ffn_output = Dropout(0.1)(ffn)
out2 = LayerNormalization(epsilon=1e-6)(out1 + ffn_output)

# Output layer
final_output = Dense(vocab_size, activation='softmax')(out2)

model = Model(inputs=input_seq, outputs=final_output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()


# 5. Train the Model

In [5]:
model.fit(X, y, batch_size=2, epochs=100, verbose=0)
print("Model Trained.")


✅ Model Trained.


# 6. Generate Text (Prediction Function)

In [6]:
def generate_text(seed_text, next_words=5):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=X.shape[1], padding='post')
        predicted_probs = model.predict(token_list, verbose=0)
        predicted_id = np.argmax(predicted_probs[0][-1])  # Get next word
        output_word = tokenizer.index_word.get(predicted_id, '')
        seed_text += " " + output_word
    return seed_text

print(generate_text("hello"))


hello     
