<a href="https://colab.research.google.com/github/bPavan16/kle-tech-archive/blob/main/src/5_word2vec_functions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import gensim.downloader as api

In [14]:
def load_word2vec_model():

    """

    This functions loads the pre-trained Word2Vec Google News model (300-dimensional)
    and returns it.

    """

    print("Loading Word2Vec model... This may take a while.")
    # Load model from Gensim dataset

    model = api.load("word2vec-google-news-300")

    print("Model loaded successfully!")

    return model

In [15]:
def word_similarity(model, word1, word2):

    """

    This function computes similarity between two words.

    """

    try:
        similarity = model.similarity(word1, word2)

        print(f"Similarity between '{word1}' and '{word2}': {similarity:.4f}")

    except KeyError as e:

        print(f"Word not found in vocabulary: {e}")

In [None]:
def most_similar_words(model, word, topn=7):

    """
    Find words most similar to a given word.

    """

    try:
        similar_words = model.most_similar(word, topn=topn)

        print(f"Top {topn} words similar to '{word}':")

        for word, score in similar_words:
            print(f"  {word}: {score:.4f}")

    except KeyError as e:
        print(f"Word not found in vocabulary: {e}")

In [16]:
def word_arithmetic(model, positive, negative, topn=5):
    """
    This Function perform the word vector arithmetic.

    Example: king - man + woman = ?

    """
    try:
        result = model.most_similar(positive=positive, negative=negative, topn=topn)

        print(f"Word arithmetic result ({positive} - {negative}):")

        for word, score in result:
            print(f"  {word}: {score:.4f}")

    except KeyError as e:

        print(f"Word not found in vocabulary: {e}")

In [17]:
# Load Model
model = load_word2vec_model()

Loading Word2Vec model... This may take a while.
Model loaded successfully!


In [18]:
# Test Examples
word_similarity(model, "king", "queen")

Similarity between 'king' and 'queen': 0.6511


In [19]:
most_similar_words(model, "computer")

Top 5 words similar to 'computer':
  computers: 0.7979
  laptop: 0.6640
  laptop_computer: 0.6549
  Computer: 0.6473
  com_puter: 0.6082


In [20]:
word_arithmetic(model, positive=["king", "woman"], negative=["man"])

Word arithmetic result (['king', 'woman'] - ['man']):
  queen: 0.7118
  monarch: 0.6190
  princess: 0.5902
  crown_prince: 0.5499
  prince: 0.5377


In [21]:
word_arithmetic(model, positive=["king", "woman"], negative=["man"])

Word arithmetic result (['king', 'woman'] - ['man']):
  queen: 0.7118
  monarch: 0.6190
  princess: 0.5902
  crown_prince: 0.5499
  prince: 0.5377


In [22]:
word_arithmetic(model, positive=["land", "water"], negative=["land"])

Word arithmetic result (['land', 'water'] - ['land']):
  potable_water: 0.6799
  Water: 0.6707
  sewage: 0.6619
  groundwater: 0.6588
  Floridan_aquifer: 0.6423
