<a href="https://colab.research.google.com/github/agrigoridou/Word-embeddings-and-Recurrent-Neural-Networks-/blob/main/%CE%91_Word_embeddings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import gensim.downloader as api
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Φόρτωση των προ-εκπαιδευμένων μοντέλων Word2Vec και GloVe

In [None]:
word2vec_model = api.load("word2vec-google-news-300")
glove_model = api.load("glove-wiki-gigaword-300")

# Συνάρτηση για εύρεση κοντινών λέξεων

In [None]:
def find_closest_words(model, word, topn=10):
    try:
        return model.most_similar(word, topn=topn)
    except KeyError:
        return f"The word '{word}' is not in the vocabulary."

# Ερώτημα 1: 10 πιο κοντινές λέξεις για προκαθορισμένες λέξεις

In [None]:
words = ['car', 'jaguar', 'Jaguar', 'facebook']
results = {}
for word in words:
    results[word] = {
        "word2vec": find_closest_words(word2vec_model, word),
        "glove": find_closest_words(glove_model, word),
    }

## Εύρεση κοινών λέξεων

In [None]:
for word in words:
    w2v_words = [item[0] for item in results[word]["word2vec"]]
    glove_words = [item[0] for item in results[word]["glove"]]
    common_words = set(w2v_words).intersection(set(glove_words))
    print(f"Common words for '{word}': {common_words}")

# Ερώτημα 2: Επαναλήψεις για λέξεις επιλογής σας

In [None]:
custom_words = ['python', 'teacher', 'university', 'robot']
custom_results = {}
for word in custom_words:
    custom_results[word] = {
        "word2vec": find_closest_words(word2vec_model, word),
        "glove": find_closest_words(glove_model, word),
    }

# Ερώτημα 3: Φιλτράρισμα λέξεων για 'student'

In [None]:
def filter_related_words(model, word, filter_words, topn=20):
    try:
        similar_words = model.most_similar(word, topn=topn)
        filtered = [item for item in similar_words if not any(fw in item[0] for fw in filter_words)]
        return filtered[:10]
    except KeyError:
        return f"The word '{word}' is not in the vocabulary."

student_filters = {
    "university": ['university', 'college', 'campus'],
    "school": ['school', 'class', 'teacher', 'student'],
}

student_results = {
    "word2vec": {
        "default": find_closest_words(word2vec_model, 'student'),
        "filtered_university": filter_related_words(word2vec_model, 'student', student_filters['university']),
        "filtered_school": filter_related_words(word2vec_model, 'student', student_filters['school']),
    },
    "glove": {
        "default": find_closest_words(glove_model, 'student'),
        "filtered_university": filter_related_words(glove_model, 'student', student_filters['university']),
        "filtered_school": filter_related_words(glove_model, 'student', student_filters['school']),
    }
}

# Ερώτημα 4: Αναλογίες

In [None]:
def find_analogies(model, word_a, word_b, word_c):
    try:
        result = model.most_similar(positive=[word_a, word_c], negative=[word_b], topn=2)
        return result
    except KeyError as e:
        return f"Error: {str(e)}"

analogies = [
    ("king", "man", "woman"),
    ("Paris", "France", "Italy"),
    ("teacher", "school", "college"),
    ("car", "road", "track"),
    ("python", "language", "code"),
]

analogy_results = {}
for analogy in analogies:
    word_a, word_b, word_c = analogy
    analogy_results[f"{word_a}-{word_b}+{word_c}"] = {
        "word2vec": find_analogies(word2vec_model, word_a, word_b, word_c),
        "glove": find_analogies(glove_model, word_a, word_b, word_c),
    }

# Ερώτημα 5: Custom αναλογίες

In [None]:
custom_analogies = [
    ("robot", "machine", "human"),
    ("doctor", "hospital", "clinic"),
    ("music", "sound", "instrument"),
]

custom_analogy_results = {}
for analogy in custom_analogies:
    word_a, word_b, word_c = analogy
    custom_analogy_results[f"{word_a}-{word_b}+{word_c}"] = {
        "word2vec": find_analogies(word2vec_model, word_a, word_b, word_c),
        "glove": find_analogies(glove_model, word_a, word_b, word_c),
    }