<a href="https://colab.research.google.com/github/bPavan16/kle-tech-archive/blob/main/nlp/6_wordEmbeddings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install gensim



In [5]:
import gensim.downloader as api

In [6]:

class WordEmbeddings:

    def __init__(self, model_name):

        """

        Initialize the WordEmbeddings class.

        Args:
        model_name (str): The name of the pre-trained embedding model to load.

        """

        print(f"Loading {model_name} model... This may take some time.")

        self.model = api.load(model_name)

        print(f"{model_name} model loaded successfully!")

    def word_similarity(self, word1, word2):

        """

        Compute similarity between two words.

        Args:
        word1 (str): The first word.
        word2 (str): The second word.

        """

        try:
            similarity = self.model.similarity(word1, word2)

            print(f"Similarity between '{word1}' and '{word2}': {similarity:.4f}")

        except KeyError as e:

            print(f"Word not found in vocabulary: {e}")

    def most_similar_words(self, word, topn=5):

      """

      Find words most similar to a given word.

      Args:

      word (str): The word to find similar words for.
      topn (int): The number of most similar words to return.

      """

      try:

          similar_words = self.model.most_similar(word, topn=topn)

          print(f"Top {topn} words similar to '{word}':")

          for w, score in similar_words:
              print(f"  {w}: {score:.4f}")

      except KeyError as e:

          print(f"Word not found in vocabulary: {e}")

    def word_arithmetic(self, positive, negative, topn=5):

        """

        Perform word vector arithmetic.
        Example: "king" - "man" + "woman" = "queen"

        Args:
        positive (list): List of words to add.
        negative (list): List of words to subtract.
        topn (int): The number of most similar words to return.


        """
        try:

            result = self.model.most_similar(positive=positive, negative=negative, topn=topn)

            print(f"Word arithmetic result ({positive} - {negative}):")

            for w, score in result:
                print(f"  {w}: {score:.4f}")

        except KeyError as e:

            print(f"Word not found in vocabulary: {e}")

    def check_word_in_vocab(self, word):

      """

      Check if a word exists in the model's vocabulary.

      Args:
      word (str): The word to check.


      """

      if word in self.model.key_to_index:

          print(f"'{word}' is in the vocabulary!")

      else:

          print(f"'{word}' is NOT in the vocabulary!")



In [None]:
# Load Word2Vec (Google News)
word2vec = WordEmbeddings("word2vec-google-news-300")


Loading word2vec-google-news-300 model... This may take some time.
[==------------------------------------------------] 4.2% 70.4/1662.8MB downloaded

In [None]:
glove = WordEmbeddings("glove-wiki-gigaword-300")

In [None]:
fasttext = WordEmbeddings("fasttext-wiki-news-subwords-300")

In [None]:
# Test Examples with Word2Vec
print("\n--- Word2Vec Tests ---")
word2vec.word_similarity("king", "queen")

In [None]:
print("\n--- Word2Vec Tests ---")
word2vec.most_similar_words("computer")

In [None]:
print("\n--- Word2Vec Tests ---")
word2vec.word_arithmetic(positive=["king", "woman"], negative=["man"])

In [None]:
print("\n--- Word2Vec Tests ---")
word2vec.check_word_in_vocab("artificial")

In [None]:
# Test Examples with GloVe
print("\n--- GloVe Tests ---")
glove.word_similarity("king", "queen")
glove.most_similar_words("computer")
glove.word_arithmetic(positive=["king", "woman"], negative=["man"])
glove.check_word_in_vocab("artificial")

In [None]:
# Test Examples with FastText
print("\n--- FastText Tests ---")
fasttext.word_similarity("king", "queen")
fasttext.most_similar_words("computer")
fasttext.word_arithmetic(positive=["king", "woman"], negative=["man"])
fasttext.check_word_in_vocab("artificial")