In [17]:
# Import necessary libraries
import tensorflow as tf
from keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, Dropout, GlobalAveragePooling1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Sample text data
text_data = """I love to eat pizza. 
Cat and dogs are palying.while the dog is barking on strangers ,this is the CBOW model implementation example """

# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text_data])
total_words = len(tokenizer.word_index) + 1

# Generate context-target pairs for CBOW
window_size = 2  # Number of context words on each side
input_sequences = []
labels = []

token_list = tokenizer.texts_to_sequences([text_data])[0]

for i in range(window_size, len(token_list) - window_size):
    context = token_list[i - window_size:i] + token_list[i + 1:i + window_size + 1]
    target = token_list[i]
    input_sequences.append(context)
    labels.append(target)

# Convert data to arrays
X = np.array(input_sequences)
y = tf.keras.utils.to_categorical(labels, num_classes=total_words)

# Define the CBOW model
model = Sequential()
model.add(Embedding(total_words, 10, input_length=2 * window_size))
model.add(GlobalAveragePooling1D())
model.add(Dropout(0.2))  # Adding dropout for regularization
model.add(Dense(total_words, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=100, verbose=1)

# Function to predict a word given its context
def predict_word(context_words):
    context_seq = tokenizer.texts_to_sequences([context_words])[0]
    context_seq = pad_sequences([context_seq], maxlen=2 * window_size, padding='pre')
    predicted_probs = model.predict(context_seq)
    predicted_word = tokenizer.index_word[np.argmax(predicted_probs)]
    return predicted_word

# Example prediction
context_words = "is the model implementation"  # Provide two words around the target word
predicted_word = predict_word(context_words)
print("Predicted target word:", predicted_word)


Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.1000 - loss: 3.1350
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step - accuracy: 0.0000e+00 - loss: 3.1364
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step - accuracy: 0.0000e+00 - loss: 3.1375
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step - accuracy: 0.0500 - loss: 3.1342
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step - accuracy: 0.1500 - loss: 3.1284
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step - accuracy: 0.1000 - loss: 3.1287
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step - accuracy: 0.1000 - loss: 3.1287
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step - accuracy: 0.1500 - loss: 3.1295
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━