In [1]:
import os
import numpy as np
from gensim.models import Word2Vec
import ptb  # Import the provided PTB dataset loading code

# Load the PTB dataset
train_corpus, word_to_id, id_to_word = ptb.load_data('train')

# Convert train_corpus to a list of sentences
def convert_corpus_to_sentences(corpus):
    sentences = []
    current_sentence = []
    for word_id in corpus:
        word = id_to_word[word_id]
        if word != '<eos>':
            current_sentence.append(word)
        else:
            sentences.append(current_sentence)
            current_sentence = []
    return sentences

# Convert train_corpus to a list of sentences
train_sentences = convert_corpus_to_sentences(train_corpus)

# Train Word2Vec models
def train_word2vec_model(sentences, vector_size, sg, window, min_count):
    model = Word2Vec(sentences=sentences, vector_size=vector_size, sg=sg, window=window, min_count=min_count)
    return model

# Train a CBOW model
cbow_model = train_word2vec_model(train_sentences, vector_size=100, sg=0, window=5, min_count=1)

# Train a Skip-Gram model
skipgram_model = train_word2vec_model(train_sentences, vector_size=100, sg=1, window=5, min_count=1)

# Function to find the most similar words for a list of target words
def find_similar_words(model, target_words):
    similar_words = {}
    for word in target_words:
        try:
            similar = model.wv.most_similar(word, topn=5)
            similar_words[word] = [word for word, score in similar]
        except KeyError:
            similar_words[word] = ["Word not in vocabulary"]
    return similar_words

# Words to find similarity for
words_to_compare = ['stock', 'revenue', 'savings', 'debt']

# Find similar words for these words using CBOW model
similar_words_cbow = find_similar_words(cbow_model, words_to_compare)

# Find similar words for these words using Skip-Gram model
similar_words_skipgram = find_similar_words(skipgram_model, words_to_compare)

# Print the results
for word in words_to_compare:
    print(f"Words most similar to '{word}' (CBOW): {similar_words_cbow[word]}")
    print(f"Words most similar to '{word}' (Skip-Gram): {similar_words_skipgram[word]}")
    print()

# Function to solve analogies
def solve_analogy(model, analogy_pairs):
    results = {}
    for pair in analogy_pairs:
        word_a, word_b, word_c = pair
        try:
            result = model.wv.most_similar(positive=[word_b, word_c], negative=[word_a], topn=1)
            results[pair] = result[0][0]
        except KeyError:
            results[pair] = "Words not in vocabulary"
    return results

# Analogy problems
analogy_pairs = [('take', 'took', 'go'), ('car', 'cars', 'child'), ('good', 'better', 'bad')]

# Solve the analogy problems using CBOW model
analogy_results_cbow = solve_analogy(cbow_model, analogy_pairs)

# Solve the analogy problems using Skip-Gram model
analogy_results_skipgram = solve_analogy(skipgram_model, analogy_pairs)

# Print the analogy results
for pair, result in analogy_results_cbow.items():
    print(f"Analogy: {pair[0]}:{pair[1]} = {pair[2]}:?")
    print(f"CBOW Model Result: {result}\n")

for pair, result in analogy_results_skipgram.items():
    print(f"Analogy: {pair[0]}:{pair[1]} = {pair[2]}:?")
    print(f"Skip-Gram Model Result: {result}\n")


Downloading ptb.train.txt ... 
Done
Words most similar to 'stock' (CBOW): ['shares', 'junk-bond', 'price', 'futures', 'share']
Words most similar to 'stock' (Skip-Gram): ['shares', 'junk-bond', 'stocks', 'plunge', 'depositary']

Words most similar to 'revenue' (CBOW): ['annual', 'income', 'cubic', 'production', 'sales']
Words most similar to 'revenue' (Skip-Gram): ['kronor', 'pretax', 'disposal', 'pesetas', 'guilders']

Words most similar to 'savings' (CBOW): ['loan', 'corp', 'capital', 'trust', 'motors']
Words most similar to 'savings' (Skip-Gram): ['loan', 'guarantees', 'valley', 'ncnb', 'lenders']

Words most similar to 'debt' (CBOW): ['cash', 'assets', 'financing', 'amount', 'payment']
Words most similar to 'debt' (Skip-Gram): ['refinancing', 'tva', 'repay', 'secured', 'issuance']

Analogy: take:took = go:?
CBOW Model Result: went

Analogy: car:cars = child:?
CBOW Model Result: walks

Analogy: good:better = bad:?
CBOW Model Result: greater

Analogy: take:took = go:?
Skip-Gram Model