<a href="https://colab.research.google.com/github/budhiacharya/AI-Lesson/blob/main/Assign_13_Ai.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Assignment 13: Generative AI Essentials

In [19]:
# Imports necessary libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.19.0


In [20]:
# Dataset: Public Shakespeare text
path = keras.utils.get_file(
    "shakespeare.txt",
    "https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt"
)

text = open(path, "r", encoding="utf-8").read().lower()
print("Total characters:", len(text))
print(text[:300])

Total characters: 1115394
first citizen:
before we proceed any further, hear me speak.

all:
speak, speak.

first citizen:
you are all resolved rather to die than to famish?

all:
resolved. resolved.

first citizen:
first, you know caius marcius is chief enemy to the people.

all:
we know't, we know't.

first citizen:
let us


In [21]:
# Word Tokenization

tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts([text])

word_index = tokenizer.word_index
total_words = len(word_index) + 1
print("Total unique words:", total_words)

# Convert full text to a list of word IDs
token_list = tokenizer.texts_to_sequences([text])[0]
print("Total tokens:", len(token_list))

Total unique words: 12634
Total tokens: 204089


In [22]:
# Create Training Sequences
seq_length = 10  # total length: 9 inputs + 1 label

input_sequences = []
for i in range(seq_length, len(token_list)):
    seq = token_list[i - seq_length:i]
    input_sequences.append(seq)

input_sequences = np.array(input_sequences)
X = input_sequences[:, :-1]  # first 9 words
y = input_sequences[:, -1]   # 10th word (label)

print("X shape:", X.shape)
print("y shape:", y.shape)

X shape: (204079, 9)
y shape: (204079,)


In [23]:
#  Build Simple LSTM Model (compatible version)
# 4. Build Simple LSTM Model
# =====================================================
embedding_dim = 64
rnn_units = 128

model = keras.Sequential([
    layers.Embedding(total_words, embedding_dim, input_length=seq_length - 1),
    layers.LSTM(rnn_units),
    layers.Dense(total_words, activation="softmax")
])

model.summary()

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer="adam"
)



In [24]:
#  Train Model
EPOCHS = 5
history = model.fit(X, y, batch_size=256, epochs=EPOCHS)

Epoch 1/5
[1m798/798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 141ms/step - loss: 7.2601
Epoch 2/5
[1m798/798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 143ms/step - loss: 6.5072
Epoch 3/5
[1m798/798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 141ms/step - loss: 6.2783
Epoch 4/5
[1m798/798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 141ms/step - loss: 6.0862
Epoch 5/5
[1m798/798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 140ms/step - loss: 5.9154


In [25]:
# Text Generation Function
# 6. Text Generation Function
# =====================================================
def generate_text(seed_text, num_words=30, temperature=1.0):
    """
    Generate text word by word from a seed.
    - seed_text: starting sentence
    - num_words: how many words to append
    - temperature: >1.0 = more random, <1.0 = more conservative
    """
    text_out = seed_text

    for _ in range(num_words):
        # Convert current text to sequence of IDs
        token_seq = tokenizer.texts_to_sequences([text_out])[0]

        # Keep only the last (seq_length - 1) words
        token_seq = token_seq[-(seq_length - 1):]

        # Pad if shorter
        padded = pad_sequences([token_seq], maxlen=seq_length - 1, padding="pre")

        # Predict next-word distribution
        preds = model.predict(padded, verbose=0)[0]

        # Apply temperature
        preds = np.log(preds + 1e-8) / temperature
        preds = np.exp(preds)
        preds = preds / np.sum(preds)

        # Avoid index 0 (padding)
        preds[0] = 0
        preds = preds / np.sum(preds)

        # Sample next word
        next_index = np.random.choice(len(preds), p=preds)
        next_word = tokenizer.index_word.get(next_index, "")

        # Append and continue
        if next_word:
            text_out += " " + next_word

    return text_out

In [26]:
# Application Demostration (Content Generation)
print("\n--- Example 1: Data science topic ---")
print(generate_text("data science is", num_words=40, temperature=0.8))

print("\n--- Example 2: Future of AI ---")
print(generate_text("the future of artificial intelligence is", num_words=40, temperature=0.8))

print("\n--- Example 3: Renewable energy ---")
print(generate_text("the benefits of renewable energy include", num_words=40, temperature=0.9))


--- Example 1: Data science topic ---
data science is the stream of the king richard hast my life prince what to my needy lord whom and you will it never answer him pale there and so i would hear to be the loss of her breath o i senator

--- Example 2: Future of AI ---
the future of artificial intelligence is the clouds of kisses yet glisters it was now perform'd a lightning if thou jest though what friar fit you he friend to day desire your gracious hands to do me to be a man that i be honour pompey

--- Example 3: Renewable energy ---
the benefits of renewable energy include whose antic hearts to mine thoughts i am so thou hast make your injustice but i am my heart to this caparison and canst have a very peace if not that that a interchange of patience then fair man that
