#LAB - 4

Task 0

In [6]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, SimpleRNN, GRU, MultiHeadAttention, LayerNormalization, Dropout, Input, GlobalAveragePooling1D
from tensorflow.keras.models import Sequential, Model
import random
import re


raw_text = """
artificial intelligence is transforming modern society.
it is used in healthcare finance education and transportation.
machine learning allows systems to improve automatically with experience.
data plays a critical role in training intelligent systems.
large datasets help models learn complex patterns.
deep learning uses multi layer neural networks.
neural networks are inspired by biological neurons.
each neuron processes input and produces an output.
training a neural network requires optimization techniques.
gradient descent minimizes the loss function.
natural language processing helps computers understand human language.
text generation is a key task in nlp.
language models predict the next word or character.
recurrent neural networks handle sequential data.
lstm and gru models address long term dependency problems.
however rnn based models are slow for long sequences.
transformer models changed the field of nlp.
they rely on self attention mechanisms.
attention allows the model to focus on relevant context.
transformers process data in parallel.
this makes training faster and more efficient.
modern language models are based on transformers.
education is being improved using artificial intelligence.
intelligent tutoring systems personalize learning.
automated grading saves time for teachers.
online education platforms use recommendation systems.
technology enhances the quality of learning experiences.
ethical considerations are important in artificial intelligence.
fairness transparency and accountability must be ensured.
ai systems should be designed responsibly.
data privacy and security are major concerns.
researchers continue to improve ai safety.
text generation models can create stories poems and articles.
they are used in chatbots virtual assistants and content creation.
generated text should be meaningful and coherent.
evaluation of text generation is challenging.
human judgement is often required.
continuous learning is essential in the field of ai.
research and innovation drive technological progress.
students should build strong foundations in mathematics.
programming skills are important for ai engineers.
practical experimentation enhances understanding.
"""

# 2. Preprocessing function
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

corpus = clean_text(raw_text)
print(f"Corpus Length: {len(corpus)} characters")
print(f"Sample: {corpus[:100]}...")

Corpus Length: 2166 characters
Sample: artificial intelligence is transforming modern society it is used in healthcare finance education an...


Task 1

In [7]:
class NGramModel:
    def __init__(self, n):
        self.n = n
        self.ngrams = {}

    def train(self, text):
        words = text.split()
        for i in range(len(words) - self.n):
            seq = tuple(words[i:i + self.n]) #history
            next_word = words[i + self.n]    #target

            if seq not in self.ngrams:
                self.ngrams[seq] = []
            self.ngrams[seq].append(next_word)

    def generate(self, seed_text, length=10):
        words = seed_text.lower().split()
        output = list(words)

        for _ in range(length):
            #last n words to use as context
            context = tuple(output[-self.n:])

            #if no random word then pick a random word
            if context not in self.ngrams:
                possible_words = list(set([val for sublist in self.ngrams.values() for val in sublist]))
                next_word = random.choice(possible_words)
            else:
                next_word = random.choice(self.ngrams[context])

            output.append(next_word)

        return ' '.join(output)

# Experiment with Bigram (N=1 history, predict next) and Trigram
print("-NGram-")
ngram_model = NGramModel(n=1) # Using 1 previous word to predict next
ngram_model.train(corpus)
print("Generated (N-gram):", ngram_model.generate("artificial", length=15))

-NGram-
Generated (N-gram): artificial intelligence intelligent systems should be designed responsibly data privacy and innovation drive technological progress students


Task 2

In [8]:
#Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts([corpus])
total_words = len(tokenizer.word_index) + 1

# Create Input Sequences
input_sequences = []
token_list = tokenizer.texts_to_sequences([corpus])[0]

# Sliding window to create sequences
sequence_length = 6 # Use 5 words to predict the 6th
for i in range(1, len(token_list)):
    n_gram_sequence = token_list[max(0, i-sequence_length):i+1]
    input_sequences.append(n_gram_sequence)

# Pad sequences
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

# Create Predictors and Label
X, y = input_sequences[:,:-1], input_sequences[:,-1]
y = tf.keras.utils.to_categorical(y, num_classes=total_words)

# Helper function to generate text for NN models
def generate_text_nn(model, tokenizer, seed_text, max_sequence_len, next_words=10):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = np.argmax(model.predict(token_list, verbose=0), axis=-1)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

# --- TASK 2: SIMPLE RNN IMPLEMENTATION ---
print("\n-RNN-")
model_rnn = Sequential([
    Embedding(total_words, 64, input_length=max_sequence_len-1),
    SimpleRNN(100), # Standard RNN Layer
    Dense(total_words, activation='softmax')
])

model_rnn.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_rnn.fit(X, y, epochs=50, verbose=0) # Epochs reduced for speed

print("Generated (RNN):", generate_text_nn(model_rnn, tokenizer, "artificial intelligence", max_sequence_len))


-RNN-
Generated (RNN): artificial intelligence intelligence intelligence modern society it is is the healthcare finance


Task 3

In [9]:
# --- TASK 3: LSTM & GRU IMPLEMENTATION ---

# 3.1 LSTM Model
print("\n-LSTM-")
model_lstm = Sequential([
    Embedding(total_words, 64, input_length=max_sequence_len-1),
    LSTM(100), # Long Short-Term Memory Layer
    Dense(total_words, activation='softmax')
])

model_lstm.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_lstm.fit(X, y, epochs=50, verbose=0)

# 3.2 GRU Model
print("-GRU-")
model_gru = Sequential([
    Embedding(total_words, 64, input_length=max_sequence_len-1),
    GRU(100), # Gated Recurrent Unit Layer
    Dense(total_words, activation='softmax')
])

model_gru.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_gru.fit(X, y, epochs=50, verbose=0)

print("Generated (LSTM):", generate_text_nn(model_lstm, tokenizer, "deep learning", max_sequence_len))
print("Generated (GRU): ", generate_text_nn(model_gru, tokenizer, "deep learning", max_sequence_len))


-LSTM-
-GRU-
Generated (LSTM): deep learning is is learning to in nlp nlp nlp the models
Generated (GRU):  deep learning is in in intelligence intelligent systems systems help to learning


Task 4

In [10]:
import tensorflow as tf
from tensorflow.keras.layers import Layer, Embedding, MultiHeadAttention, LayerNormalization, Dropout, Dense, Input, GlobalAveragePooling1D
from tensorflow.keras.models import Model, Sequential

# 1. Define Position Embedding Layer
class TokenAndPositionEmbedding(Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

# 2. Define Transformer Block (FIXED: added default for training)
class TransformerBlock(Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = Sequential([
            Dense(ff_dim, activation="relu"),
            Dense(embed_dim),
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    # FIX: Set training=None as default
    def call(self, inputs, training=None):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

# 3. Build the Transformer Model
embed_dim = 64  # Embedding size for each token
num_heads = 2   # Number of attention heads
ff_dim = 32     # Hidden layer size in feed forward network inside transformer

# Model Architecture
inputs = Input(shape=(max_sequence_len-1,))
embedding_layer = TokenAndPositionEmbedding(max_sequence_len-1, total_words, embed_dim)
x = embedding_layer(inputs)

transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)  # Now works without passing training=...

x = GlobalAveragePooling1D()(x)
x = Dropout(0.1)(x)
x = Dense(20, activation="relu")(x)
outputs = Dense(total_words, activation="softmax")(x)

model_transformer = Model(inputs=inputs, outputs=outputs)

print("\n-Transformer-")
model_transformer.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model_transformer.fit(X, y, epochs=50, verbose=0)

# Generate Text Function (Reused from previous steps)
def generate_text_nn(model, tokenizer, seed_text, max_sequence_len, next_words=10):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = np.argmax(model.predict(token_list, verbose=0), axis=-1)[0]
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

print("Generated (Transformer):", generate_text_nn(model_transformer, tokenizer, "transformer models", max_sequence_len))


-Transformer-
Generated (Transformer): transformer models is is neural networks are inspired by biological neurons each
