In [11]:
#ASSIGNMENT 5
#Title:  Implement the Continuous Bag of Words (CBOW) Model. Stages can be:
#a. Data preparation
#b. Generate training data
#c. Train model
#d. Output
#Name:PRANJAL SUHAS PATIL
#Roll No:46
#Batch: IT-3
#Department: Information Technology

In [70]:
import numpy as np
import re

In [2]:
# -----------------------------------------
# (a) DATA PREPARATION
# -----------------------------------------
text = "we are studying deep learning with simple examples to understand how models work"

words = text.lower().split()   # convert text to lowercase and split into individual words

vocab = list(set(words))   # create a list of unique words (vocabulary)
vocab_size = len(vocab)     # number of unique words
embed_dim = 8               #Size of each word vector (embedding).Each word will be represented by an 8-dimensional vector.
window = 2                  #Window = 2 means:Take 2 words before the target wordTake 2 words after the target word

word_to_idx = {w:i for i,w in enumerate(vocab)}      # dictionary: word â†’ index number
idx_to_word = {i:w for w,i in word_to_idx.items()}    #idx_to_word = {i:w for w, i in word_to_idx.items()}


In [88]:
# -----------------------------------------
# (b) GENERATE TRAINING DATA (CBOW)
# -----------------------------------------
data = []        # list to store all (context, target) pairs

for i in range(window, len(words)-window):        # loop through each word, skipping first 2 and last 2 words
    
    context = [words[i-2], words[i-1], words[i+1], words[i+2]]   # pick 4 context words (2 before and 2 after the target word)
    target = words[i]         # the middle word is the target word
    data.append((context, target))        # add the pair (context words, target word) to the training data

In [89]:

# -----------------------------------------
# (c) TRAIN MODEL
# -----------------------------------------
emb = np.random.randn(vocab_size, embed_dim)
W = np.random.randn(embed_dim*4, vocab_size)

def softmax(x):
    ex = np.exp(x - np.max(x))
    return ex / ex.sum()

for epoch in range(20):
    for ctx, tgt in data:
        idxs = [word_to_idx[w] for w in ctx]
        tgt_idx = word_to_idx[tgt]

        x = emb[idxs].reshape(-1)
        y = softmax(x @ W)

        y[tgt_idx] -= 1
        W -= 0.05 * np.outer(x, y)

In [90]:

# -----------------------------------------
# (d) OUTPUT
# -----------------------------------------
def predict(context):
    idxs = [word_to_idx[w] for w in context]
    x = emb[idxs].reshape(-1)
    y = softmax(x @ W)
    return idx_to_word[np.argmax(y)]

# print("Prediction:", predict(["we", "are", "to", "study"]))

In [92]:
print("Prediction 1:", predict(["we", "are", "deep", "learning"]))
print("Prediction 2:", predict(["are", "studying", "learning", "with"]))
print("Prediction 3:", predict(["studying", "deep", "with", "simple"]))
print("Prediction 4:", predict(["deep", "learning", "simple", "examples"]))
print("Prediction 5:", predict(["learning", "with", "examples", "to"]))
print("Prediction 6:", predict(["with", "simple", "to", "understand"]))
print("Prediction 7:", predict(["simple", "examples", "understand", "how"]))
print("Prediction 8:", predict(["examples", "to", "how", "models"]))
print("Prediction 9:", predict(["to", "understand", "models", "work"]))
print("Prediction 10:", predict(["understand", "how", "work", "we"]))  # wraps around




Prediction 1: studying
Prediction 2: deep
Prediction 3: learning
Prediction 4: with
Prediction 5: simple
Prediction 6: examples
Prediction 7: to
Prediction 8: understand
Prediction 9: how
Prediction 10: to


In [61]:
# import numpy as np
# import re

# # --------------------------------------------------
# # (a) DATA PREPARATION
# # --------------------------------------------------
# text ="""We are about to study the idea of a computational process.
# Computational processes are abstract beings that inhabit computers.
# As they evolve, processes manipulate other abstract things called data.
# The evolution of a process is directed by a pattern of rules
# called a program. People create programs to direct processes. In effect,
# we conjure the spirits of the computer with our spells."""
# text = text.lower()
# words = text.split()

# vocab = list(set(words))
# vocab_size = len(vocab)
# embed_dim = 8
# window = 2

# word_to_idx = {w:i for i,w in enumerate(vocab)}
# idx_to_word = {i:w for w,i in word_to_idx.items()}

# # --------------------------------------------------
# # (b) GENERATE TRAINING DATA (CBOW)
# # --------------------------------------------------
# data = []
# for i in range(window, len(words)-window):
#     context = [words[i-2], words[i-1], words[i+1], words[i+2]]
#     target = words[i]
#     data.append((context, target))

# # --------------------------------------------------
# # (c) TRAIN MODEL (VERY SIMPLE CBOW)
# # --------------------------------------------------
# emb = np.random.randn(vocab_size, embed_dim)
# W = np.random.randn(embed_dim*4, vocab_size)

# def softmax(x):
#     e = np.exp(x - np.max(x))
#     return e / e.sum()

# for epoch in range(25):
#     for ctx, tgt in data:

#         ctx_idx = [word_to_idx[w] for w in ctx]
#         tgt_idx = word_to_idx[tgt]

#         ctx_vec = emb[ctx_idx].reshape(-1)
#         scores = ctx_vec @ W
#         probs = softmax(scores)

#         probs[tgt_idx] -= 1
#         W -= 0.05 * np.outer(ctx_vec, probs)

# # --------------------------------------------------
# # (d) OUTPUT (Prediction)
# # --------------------------------------------------
# def predict(context_words):
#     idxs = [word_to_idx[w] for w in context_words]
#     vec = emb[idxs].reshape(-1)
#     probs = softmax(vec @ W)
#     return idx_to_word[np.argmax(probs)]

# print("Prediction:", predict(["we", "are", "to", "study"]))
# print("Prediction 9:", predict(["spirits","of","computer","with"]))  

Prediction: about
Prediction 9: the
