<h1>Imports and Model Loading</h1>

In [1]:
#!/usr/bin/env python
# coding: utf-8

# Import necessary libraries
import Levenshtein
import gensim.downloader as api
from scipy.spatial.distance import cosine

# Load a pre-trained Word2Vec model
model = api.load("word2vec-ruscorpora-300")




<h1>Function Definitions</h1>

In [2]:
def exact_match(word1, word2):
    """Check for exact match between two words."""
    return word1.lower() == word2.lower()  # Ignore case sensitivity

def levenshtein_distance(word1, word2):
    """Calculate Levenshtein distance between two words."""
    return Levenshtein.distance(word1, word2)

def jaccard_match(word1, word2):
    """Calculate Jaccard similarity between two words."""
    set1 = set(word1)
    set2 = set(word2)
    intersection = len(set1.intersection(set2))
    union = len(set1.union(set2))
    return intersection / union if union > 0 else 0  # Avoid division by zero

def cosine_similarity(word1, word2):
    """Calculate cosine similarity between two words using their word vectors."""
    try:
        # Get the word vectors
        vector1 = model[word1.lower()]  # Ensure lower case
        vector2 = model[word2.lower()]  # Ensure lower case
        
        # Calculate cosine similarity (1 - cosine distance)
        return 1 - cosine(vector1, vector2)
    except KeyError:
        return None  # Return None if the word is not in the model


<h1>Word Pair Evaluation</h1>

In [4]:
# List of word pairs for comparison
word_pairs = [
    ("happy", "joyful"),         # Synonyms
    ("sad", "unhappy"),          # Related antonyms
    ("light", "dark"),           # Antonyms
    ("car", "automobile"),       # Synonyms
    ("teacher", "student"),      # Related roles
    ("fast", "quick"),           # Synonyms
    ("big", "large"),            # Synonyms
    ("cold", "ice"),             # Related concepts
    ("run", "running"),          # Different forms of the same word
    ("strong", "powerful"),      # Synonyms
    ("beautiful", "ugly"),       # Antonyms
    ("child", "adult"),          # Different life stages
    ("friend", "enemy"),         # Opposites
    ("smart", "intelligent"),    # Synonyms
    ("fish", "swim"),            # Related actions
    ("love", "hate")             # Opposites
]

# Evaluate each pair of words
for word1, word2 in word_pairs:
    print(f"Itr :: {word1} :: {word2}\n")
    
    print(f"Exact Match :: {exact_match(word1, word2)} ")
    print(f"Levenshtein Distance :: {levenshtein_distance(word1, word2)}")
    print(f"Jaccard Similarity :: {jaccard_match(word1, word2)}")
    
    cosine_sim = cosine_similarity(word1, word2)
    if cosine_sim is not None:
        print(f"Cosine Similarity :: {cosine_sim}")
    else:
        print(f"Cosine Similarity :: One or both words not in the model.")
    
    print("\n")

Itr :: happy :: joyful

Exact Match :: False 
Levenshtein Distance :: 6
Jaccard Similarity :: 0.1111111111111111
Cosine Similarity :: One or both words not in the model.


Itr :: sad :: unhappy

Exact Match :: False 
Levenshtein Distance :: 6
Jaccard Similarity :: 0.125
Cosine Similarity :: One or both words not in the model.


Itr :: light :: dark

Exact Match :: False 
Levenshtein Distance :: 5
Jaccard Similarity :: 0.0
Cosine Similarity :: One or both words not in the model.


Itr :: car :: automobile

Exact Match :: False 
Levenshtein Distance :: 10
Jaccard Similarity :: 0.09090909090909091
Cosine Similarity :: One or both words not in the model.


Itr :: teacher :: student

Exact Match :: False 
Levenshtein Distance :: 7
Jaccard Similarity :: 0.2
Cosine Similarity :: One or both words not in the model.


Itr :: fast :: quick

Exact Match :: False 
Levenshtein Distance :: 5
Jaccard Similarity :: 0.0
Cosine Similarity :: One or both words not in the model.


Itr :: big :: large

Exa