In [None]:
# Practical 7: Continuous Bag of Words (CBOW) Model – Document 2

# Step 1: Import Libraries
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Lambda, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt

# Step 2: Prepare Sample Text Data
text = "artificial intelligence is transforming industries and creating opportunities across the world"
text = text.lower().split()

# Step 3: Tokenize Words and Create Vocabulary
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
word2id = tokenizer.word_index
id2word = {v: k for k, v in word2id.items()}
vocab_size = len(word2id) + 1

# Step 4: Generate Training Data for CBOW
window_size = 2
data = []

for i in range(window_size, len(text) - window_size):
    context = []
    for j in range(-window_size, window_size + 1):
        if j != 0:
            context.append(word2id[text[i + j]])
    target = word2id[text[i]]
    data.append((context, target))

X = np.array([x for x, _ in data])
y = np.array([y for _, y in data])

# Step 5: Define CBOW Model Architecture
embedding_dim = 10
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=2 * window_size),
    Lambda(lambda x: K.mean(x, axis=1), output_shape=(embedding_dim,)),
    Dense(vocab_size, activation='softmax')
])

# Step 6: Compile the Model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

# Step 7: Train the Model
history = model.fit(X, y, epochs=100, verbose=0)
print("Training complete.")

# Step 8: Plot Training Loss
plt.plot(history.history['loss'])
plt.title('Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.show()

# Step 9: Display Word Embeddings
weights = model.get_weights()[0]
for word, idx in word2id.items():
    print(f"{word}: {weights[idx]}")


# ------------------------------------------------------------
# EXPLANATION OF EACH STEP
# ------------------------------------------------------------
# Step 1: Import Libraries
# - TensorFlow/Keras for the neural network.
# - Tokenizer for word-to-index conversion.
# - Matplotlib for loss visualization.

# Step 2: Prepare Sample Text Data
# - A short custom sentence (document 2) is tokenized into words.
# - Used as the training data for CBOW.

# Step 3: Tokenize Words and Create Vocabulary
# - Each unique word is given an integer ID.
# - Vocabulary size = total unique words + 1.

# Step 4: Generate Training Data
# - CBOW predicts the target word from the surrounding context.
# - Window size = 2 → 2 words before and 2 words after the target.

# Step 5: Define CBOW Model Architecture
# - Embedding layer: converts words into dense numeric vectors.
# - Lambda layer: computes average of context embeddings.
# - Dense layer: outputs probability distribution for predicting target word.

# Step 6: Compile the Model
# - Loss: sparse categorical crossentropy.
# - Optimizer: Adam for adaptive learning.

# Step 7: Train the Model
# - Model learns context-word relationships for 100 epochs.

# Step 8: Plot Training Loss
# - Displays the loss reduction over epochs → ensures learning stability.

# Step 9: Display Word Embeddings
# - Prints learned word vector representations.
# - Words with similar context appear closer in vector space.

# ------------------------------------------------------------
# VIVA QUESTIONS
# ------------------------------------------------------------
# Q1. What is the purpose of the CBOW model?
#     -> To predict a target word given its surrounding context words.
# Q2. What is the input and output of CBOW?
#     -> Input: context words; Output: target word.
# Q3. What is a window size in CBOW?
#     -> Number of words around the target word considered as context.
# Q4. What is the role of the Embedding layer?
#     -> Converts integer word indices into dense vector representations.
# Q5. Why use a mean (Lambda layer) in CBOW?
#     -> Combines multiple context embeddings into a single averaged vector.
# Q6. What loss function is used?
#     -> Sparse categorical crossentropy.
# Q7. What optimizer is used?
#     -> Adam optimizer.
# Q8. What is the advantage of CBOW over Skip-Gram?
#     -> CBOW is faster for large datasets, predicting one center word per context.
# Q9. What does the output layer (Softmax) do?
#     -> Produces probability distribution for all possible target words.
# Q10. Applications of CBOW?
#     -> Word embeddings, NLP tasks like text classification, and semantic analysis.
