In [3]:
# Import necessary libraries and modules
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from ptb import load_data
from cbow import CBOW
from skip_gram import SkipGram

# Load the Penn Treebank dataset
train_data, word_to_id, id_to_word = load_data('train')
test_data, _, _ = load_data('test')
valid_data, _, _ = load_data('valid')

# Function to find the top 5 words similar to a given word
def find_similar_words(model, word, top_n=5):
    if word in word_to_id:
        word_vector = model.word_vecs[word_to_id[word]].reshape(1, -1)
        similarities = cosine_similarity(word_vector, model.word_vecs)
        similar_word_indices = np.argsort(similarities[0])[-(top_n+1):-1][::-1]  # Exclude the word itself
        similar_words = [id_to_word[i] for i in similar_word_indices]
        return similar_words
    else:
        return []

# Train CBOW model
cbow_model = CBOW(vocab_size=len(word_to_id), hidden_size=100, window_size=5, corpus=train_data)

# Train Skip-Gram model
skip_gram_model = SkipGram(vocab_size=len(word_to_id), hidden_size=100, window_size=5, corpus=train_data)

# Function to train a model
def train_model(model, data, word_to_id, id_to_word, num_epochs=10):
    for epoch in range(num_epochs):
        total_loss = 0
        for context, target in data:
            model.forward(context, target)
            model.backward()
            total_loss += model.loss

        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss}')
    model.word_vecs = model.word_vecs  # Save the word vectors

# Train the CBOW model
train_model(cbow_model, train_data, word_to_id, id_to_word)

# Train the Skip-Gram model
train_model(skip_gram_model, train_data, word_to_id, id_to_word)

# Find similar words for 'stock', 'revenue', 'savings', and 'debt'
similar_words_stock_cbow = find_similar_words(cbow_model, 'stock', top_n=5)
similar_words_stock_skip_gram = find_similar_words(skip_gram_model, 'stock', top_n=5)

similar_words_revenue_cbow = find_similar_words(cbow_model, 'revenue', top_n=5)
similar_words_revenue_skip_gram = find_similar_words(skip_gram_model, 'revenue', top_n=5)

similar_words_savings_cbow = find_similar_words(cbow_model, 'savings', top_n=5)
similar_words_savings_skip_gram = find_similar_words(skip_gram_model, 'savings', top_n=5)

similar_words_debt_cbow = find_similar_words(cbow_model, 'debt', top_n=5)
similar_words_debt_skip_gram = find_similar_words(skip_gram_model, 'debt', top_n=5)

print("Words similar to 'stock' (CBOW):", similar_words_stock_cbow)
print("Words similar to 'stock' (Skip-Gram):", similar_words_stock_skip_gram)
print("Words similar to 'revenue' (CBOW):", similar_words_revenue_cbow)
print("Words similar to 'revenue' (Skip-Gram):", similar_words_revenue_skip_gram)
print("Words similar to 'savings' (CBOW):", similar_words_savings_cbow)
print("Words similar to 'savings' (Skip-Gram):", similar_words_savings_skip_gram)
print("Words similar to 'debt' (CBOW):", similar_words_debt_cbow)
print("Words similar to 'debt' (Skip-Gram):", similar_words_debt_skip_gram)

# Function to solve an analogy problem
def solve_analogy(model, word_a, word_b, word_c):
    if word_a in word_to_id and word_b in word_to_id and word_c in word_to_id:
        vector_a = model.word_vecs[word_to_id[word_a]]
        vector_b = model.word_vecs[word_to_id[word_b]]
        vector_c = model.word_vecs[word_to_id[word_c]]
        analogy_vector = vector_b - vector_a + vector_c

        similarities = cosine_similarity(analogy_vector.reshape(1, -1), model.word_vecs)
        most_similar_index = np.argmax(similarities[0])
        most_similar_word = id_to_word[most_similar_index]
        return most_similar_word
    else:
        return ""

# Solve analogy problems
analogy_1_cbow = solve_analogy(cbow_model, 'take', 'took', 'go')
analogy_2_cbow = solve_analogy(cbow_model, 'car', 'cars', 'child')
analogy_3_cbow = solve_analogy(cbow_model, 'good', 'better', 'bad')

analogy_1_skip_gram = solve_analogy(skip_gram_model, 'take', 'took', 'go')
analogy_2_skip_gram = solve_analogy(skip_gram_model, 'car', 'cars', 'child')
analogy_3_skip_gram = solve_analogy(skip_gram_model, 'good', 'better', 'bad')

print("Analogy: take:took = go:?", analogy_1_cbow, analogy_1_skip_gram)
print("Analogy: car:cars = child:?", analogy_2_cbow, analogy_2_skip_gram)
print("Analogy: good:better = bad:?", analogy_3_cbow, analogy_3_skip_gram)


ModuleNotFoundError: No module named 'common'