In [None]:
# Practical 8: Continuous Bag of Words (CBOW) Model – Document 3

# Step 1: Import Libraries
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Lambda, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt

# Step 2: Prepare Sample Text Data
text = "deep learning is a subset of machine learning that uses neural networks to model complex patterns"
text = text.lower().split()

# Step 3: Tokenize Words and Create Vocabulary
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
word2id = tokenizer.word_index
id2word = {v: k for k, v in word2id.items()}
vocab_size = len(word2id) + 1

# Step 4: Generate Training Data for CBOW
window_size = 2
data = []

for i in range(window_size, len(text) - window_size):
    context = []
    for j in range(-window_size, window_size + 1):
        if j != 0:
            context.append(word2id[text[i + j]])
    target = word2id[text[i]]
    data.append((context, target))

X = np.array([x for x, _ in data])
y = np.array([y for _, y in data])

# Step 5: Define CBOW Model Architecture
embedding_dim = 10
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=2 * window_size),
    Lambda(lambda x: K.mean(x, axis=1), output_shape=(embedding_dim,)),
    Dense(vocab_size, activation='softmax')
])

# Step 6: Compile the Model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

# Step 7: Train the Model
history = model.fit(X, y, epochs=100, verbose=0)
print("Training complete.")

# Step 8: Plot Training Loss
plt.plot(history.history['loss'])
plt.title('Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.show()

# Step 9: Display Word Embeddings
weights = model.get_weights()[0]
for word, idx in word2id.items():
    print(f"{word}: {weights[idx]}")


# ------------------------------------------------------------
# EXPLANATION OF EACH STEP
# ------------------------------------------------------------
# Step 1: Import Libraries
# - TensorFlow/Keras for CBOW implementation.
# - Tokenizer for converting words to numeric IDs.
# - Matplotlib for plotting training loss.

# Step 2: Prepare Sample Text Data
# - Custom text (document 3) about deep learning is split into words for training.

# Step 3: Tokenize Words and Create Vocabulary
# - Tokenizer assigns a unique integer to every unique word.
# - Vocabulary size = total number of unique words + 1.

# Step 4: Generate Training Data
# - CBOW predicts a target word using words around it (context).
# - Window size = 2 → two words before and two after.
# - Example: for “subset”, context = ["learning", "is", "of", "machine"].

# Step 5: Define CBOW Model Architecture
# - Embedding layer: converts word IDs to dense vectors.
# - Lambda layer: averages embeddings of context words.
# - Dense layer: uses Softmax to predict the most probable target word.

# Step 6: Compile the Model
# - Loss: sparse categorical crossentropy for predicting a single class.
# - Optimizer: Adam (efficient gradient-based optimization).

# Step 7: Train the Model
# - Model learns to predict target words from their context for 100 epochs.

# Step 8: Plot Training Loss
# - Displays how the model’s loss decreases across epochs, confirming learning progress.

# Step 9: Display Word Embeddings
# - Displays 10-dimensional embeddings learned for each word.
# - Similar words have similar vector values.

# ------------------------------------------------------------
# VIVA QUESTIONS
# ------------------------------------------------------------
# Q1. What is the CBOW model?
#     -> A neural network that predicts a target word given its surrounding context words.
# Q2. What dataset/text is used here?
#     -> A custom text about deep learning and neural networks.
# Q3. What is the difference between CBOW and Skip-Gram?
#     -> CBOW predicts target word from context; Skip-Gram predicts context words from target.
# Q4. What does the Embedding layer do?
#     -> Converts integer tokens into continuous vector representations.
# Q5. Why use the mean (Lambda layer)?
#     -> Averages multiple context word embeddings into one feature vector.
# Q6. What loss and optimizer are used?
#     -> Loss = sparse categorical crossentropy; Optimizer = Adam.
# Q7. How many epochs are used for training?
#     -> 100 epochs for stable learning of relationships.
# Q8. What is the purpose of Softmax layer?
#     -> Outputs probability for each word being the target.
# Q9. What are word embeddings?
#     -> Dense vector representations capturing meaning and relationships of words.
# Q10. Applications of CBOW embeddings?
#     -> Used in NLP tasks like text classification, chatbots, translation, and search engines.
