In [None]:
import gensim
import gensim.downloader as api
from gensim.models.word2vec import Word2Vec

# Create a model by loading the text8 dataset
corpus = api.load('text8')

# Create a CBOW model
cbow_model = Word2Vec(corpus,
                      min_count=1,
                      vector_size=5,
                      window=4)

# Create a Skip-Gram model
skipgram_model = Word2Vec(corpus,
                          min_count=1,
                          vector_size=5,
                          window=4,
                          sg=True)

In [None]:
# Print the model description
model_dict = gensim.downloader.info()['models']['word2vec-google-news-300']

for key in ['num_records', 'base_dataset', 'description']:
    print(f'{key: <12}: {model_dict[key]}')

# Download in the model
google_cbow = api.load('word2vec-google-news-300')

In [None]:
# Return the embedding for a word
print('Word Embedding for "tree":\n')
print(f'CBOW:        {cbow_model.wv["tree"]}')
print(f'Skip-Gram:   {skipgram_model.wv["tree"]}')
print(f'Google CBOW: {google_cbow["tree"][:5]}\n\n')


# Calculate the similarity between words
print('Similarity Between "tree" and "leaf":\n')
print(f'CBOW:        {cbow_model.wv.similarity("tree", "leaf")}')
print(f'Skip-Gram:   {skipgram_model.wv.similarity("tree", "leaf")}')
print(f'Google CBOW: {google_cbow.similarity("tree", "leaf")}\n\n')


# Return the top 3 most similiar words
print('Most Similar Words to "tree":\n')
print(f'CBOW:        {cbow_model.wv.most_similar("tree", topn=3)}')
print(f'Skip-Gram:   {skipgram_model.wv.most_similar("tree", topn=3)}')
print(f'Google CBOW: {google_cbow.most_similar("tree", topn=3)}\n\n')


# Find which word doesn't match
words = ['tree', 'leaf', 'plant', 'bark', 'car']

cbow_result = cbow_model.wv.doesnt_match(words)
skipgram_result = skipgram_model.wv.doesnt_match(words)
google_result = google_cbow.doesnt_match(words)

print(f"Find Which Word Doesn't Match: {words}:\n")
print(f'CBOW:        {cbow_result}')
print(f'Skip-Gram:   {skipgram_result}')
print(f'Google CBOW: {google_result}')

In [None]:
# King -> Queen example
king = google_cbow['king']
man = google_cbow['man']
woman = google_cbow['woman']

king_result = google_cbow.most_similar(king-man+woman, topn=2)[1]
print(king_result)


# Paris -> Berlin example
paris = google_cbow['paris']
france = google_cbow['france']
berlin = google_cbow['berlin']

paris_result = google_cbow.most_similar(paris-france+berlin, topn=1)[0]
print(paris_result)