In [6]:
import nltk
from nltk.corpus import wordnet as wn
from gensim.models import Word2Vec

import os
os.environ['OPENBLAS_NUM_THREADS'] = '64'
print(os.getcwd())
os.chdir('/app')  # Change to your project directory
print("Current directory:", os.getcwd())

# Ensure the necessary NLTK data is downloaded
nltk.download('wordnet')
nltk.download('omw-1.4')

# Load a pre-trained Word2Vec model
# Replace this with the path to your Word2Vec model (e.g., Google News model)
model_path = "embedding_files/datasets/word2vec_1billion/custom_word2vec.model"
try:
    word2vec = Word2Vec.load(model_path)
except Exception as e:
    print(f"Error loading Word2Vec model: {e}")
    exit()

def get_synonyms(word):
    """Fetch synonyms of the given word using WordNet."""
    synonyms = set()
    for synset in wn.synsets(word):
        for lemma in synset.lemmas():
            synonyms.add(lemma.name().replace('_', ' '))
    return synonyms

def calculate_similarity(word, synonyms, model):
    """Calculate similarity between the word and its synonyms using Word2Vec."""
    similarities = {}
    for synonym in synonyms:
        if synonym in model.wv.key_to_index and word in model.wv.key_to_index:
            similarity = model.wv.similarity(word, synonym)
            similarities[synonym] = similarity
    return similarities

# Input word
word = "drive"

# Get synonyms
synonyms = get_synonyms(word)
if not synonyms:
    print(f"No synonyms found for '{word}'.")
else:
    print(f"Synonyms for '{word}': {', '.join(synonyms)}")

    # Calculate similarity
    similarities = calculate_similarity(word, synonyms, word2vec)
    if not similarities:
        print(f"No similarities calculated (word or synonyms missing in Word2Vec vocabulary).")
    else:
        print("\nSimilarity scores:")
        for synonym, score in sorted(similarities.items(), key=lambda x: x[1], reverse=True):
            print(f"{synonym}: {score:.4f}")
        least_similar_word = min(similarities, key=similarities.get)
        print(f"\nLeast similar word: {least_similar_word} ({similarities[least_similar_word]:.4f})")

/app
Current directory: /app


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Synonyms for 'drive': push back, driving force, force, force back, motor, labour, cause, movement, ride, repulse, tug, ram, parkway, crusade, driving, effort, driveway, private road, push, campaign, beat back, thrust, aim, take, drive, get, labor, repel

Similarity scores:
drive: 1.0000
driving: 0.6092
get: 0.4795
force: 0.4522
take: 0.4364
aim: 0.4215
effort: 0.4090
push: 0.3694
campaign: 0.3586
cause: 0.3125
ride: 0.2931
labor: 0.2671
labour: 0.2384
motor: 0.2042
driveway: 0.1954
ram: 0.1689
parkway: 0.1641
movement: 0.1388
repel: 0.0568
crusade: 0.0267
tug: 0.0087
thrust: -0.0008

Least similar word: thrust (-0.0008)
