In [11]:
import gensim.downloader as api
from numpy.linalg import norm
import numpy as np

# Load the pre-trained GloVe model (300 dimensions, trained on Wikipedia + Gigaword)
model = api.load('glove-wiki-gigaword-300')

# Function to calculate cosine similarity between two vectors
def cosine_similarity(vec1, vec2):
    return np.dot(vec1, vec2) / (norm(vec1) * norm(vec2))

# Function to compute distance between two words using GloVe
def glove_distance(word1, word2, model):
    if word1 in model and word2 in model:
        vec1 = model[word1]
        vec2 = model[word2]
        
        # Calculate Cosine Similarity
        similarity = cosine_similarity(vec1, vec2)
        distance = 1 - similarity  # Cosine distance
        
        return similarity, distance
    return 0

# Example words
word1 = 'king'
word2 = 'queen'

# Compute GloVe similarity and distance
#similarity, distance = glove_distance(word1, word2, model)
#print(f"Cosine Similarity: {similarity}")
#print(f"Cosine Distance: {distance}")
print(glove_distance(word1, word2, model))
print(glove_distance('king', 'king', model))
print(glove_distance('king', 'dffsdfsdf', model))

(0.63364685, 0.3663531541824341)
(1.0, 0.0)
One or both words ('king', 'dffsdfsdf') not found in the GloVe model.


In [16]:
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')  # Optional: for multilingual WordNet data
nltk.download('punkt')     # Optional: for tokenization

from nltk.corpus import wordnet as wn

# Function to get the synsets of the word
def get_synsets(word):
    return wn.synsets(word)

# Function to calculate semantic distance using different similarity metrics
def calculate_semantic_similarity(word1, word2):
    synsets1 = get_synsets(word1)
    synsets2 = get_synsets(word2)
    
    # Ensure words have synsets (not all words exist in WordNet)
    if not synsets1 or not synsets2:
        return None
    
    # Initialize maximum similarity
    max_path_sim = 0
    max_wup_sim = 0
    max_lch_sim = 0
    
    # Iterate through all synset pairs and calculate the similarity
    for synset1 in synsets1:
        for synset2 in synsets2:
            path_sim = synset1.path_similarity(synset2)
            wup_sim = synset1.wup_similarity(synset2)
            try:
                lch_sim = synset1.lch_similarity(synset2)
            except:
                lch_sim = None
            
            # Update maximum similarity found
            if path_sim is not None and path_sim > max_path_sim:
                max_path_sim = path_sim
            if wup_sim is not None and wup_sim > max_wup_sim:
                max_wup_sim = wup_sim
            if lch_sim is not None and lch_sim > max_lch_sim:
                max_lch_sim = lch_sim
    
    return {
        'Path Similarity': max_path_sim,
        'Wu-Palmer Similarity': max_wup_sim,
        'Leacock-Chodorow Similarity': max_lch_sim
    }

print(calculate_semantic_similarity('king', 'queen'))

{'Path Similarity': 1.0, 'Wu-Palmer Similarity': 1.0, 'Leacock-Chodorow Similarity': 3.6375861597263857}


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\adamk\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\adamk\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\adamk\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [15]:
import gensim.downloader as api
from numpy.linalg import norm
import numpy as np

# Load pre-trained Word2Vec model from Google News
model = api.load('word2vec-google-news-300')

# Function to calculate cosine similarity between two vectors
def cosine_similarity(vec1, vec2):
    return np.dot(vec1, vec2) / (norm(vec1) * norm(vec2))

# Function to compute distance between two words using Word2Vec
def word2vec_distance(word1, word2, model):
    if word1 in model and word2 in model:
        vec1 = model[word1]
        vec2 = model[word2]
        
        # Calculate Cosine Similarity
        similarity = cosine_similarity(vec1, vec2)
        distance = 1 - similarity  # Cosine distance
        
        return similarity, distance
    else:
        return f"One or both words ('{word1}', '{word2}') not found in the Word2Vec model."

# Example words
word1 = 'daddy'
word2 = 'dad'

# Compute Word2Vec similarity and distance
similarity, distance = word2vec_distance(word1, word2, model)
print(f"Cosine Similarity: {similarity}")
print(f"Cosine Distance: {distance}")



KeyboardInterrupt: 