In [None]:
from gensim.models import Word2Vec
import faiss
import numpy as np 

corpus = [
    "farmers use tractors to plow the field",
    "seeds grow into plants with water and sunlight",
    "crops are harvested and sold in the market",
    "agriculture helps to produce food for people",
    "technology improves smart farming and crop management"
]

sentences = [sentence.split() for sentence in corpus]

model = Word2Vec(
    sentences,
    vector_size=50,   
    window=3,         
    min_count=1,      
    sg=1,             
    epochs=100
)

words = list(model.wv.index_to_key)
embeddings = np.array([model.wv[word] for word in words]).astype('float32')
print("number of words : ",len(words))

dimension = embeddings.shape[1]

index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
print("Total vectors indexed:" , index.ntotal)

def find_similar_words(query_word , top_k=5) : 
    if query_word not in model.wv:
        print(f"'{query_word}' not in vocabulary.")
        return
    
    query_vector = np.array([model.wv[query_word]]).astype('float32')
    distances , indices = index.search(query_vector , top_k)

    print(f"\nTop {top_k} words similar to '{query_word}':\n")
    for idx , dist in zip(indices[0] , distances[0]) : 
        print(f"{words[idx]} (distance: {dist:.4f})")

find_similar_words("farmers")
find_similar_words("technology")
find_similar_words("crops")






