#### Averaged Word2Vec embedding

In [None]:
import numpy as np
from gensim.models import KeyedVectors

In [None]:
# Load pre-trained Word2Vec model
word2vec_model = KeyedVectors.load_word2vec_format("GoogleNews-vectors-negative300.bin", binary=True)

In [None]:
# Tokenize text into words.
def tokenize(text):
    """Tokenize text into words."""
    return text.lower().split()

In [None]:
# Compute the averaged Word2Vec vector
def get_average_word2vec(text, model, oov_vector=None):
    """
    Compute the averaged Word2Vec vector for a given text.
    - text: Input string.
    - model: Pre-trained Word2Vec model.
    - oov_vector: Vector to use for out-of-vocabulary words (default is None).
    """
    tokens = tokenize(text)
    vectors = []
    
    for word in tokens:
        if word in model:
            vectors.append(model[word])
        elif oov_vector is not None:
            vectors.append(oov_vector)
    
    if vectors:
        return np.mean(vectors, axis=0)  # Average the vectors
    else:
        return np.zeros(model.vector_size)  # Return zero vector if no words are valid


In [None]:
# Example usage
text = "The quick brown fox jumps over the lazy dog."
average_vector = get_average_word2vec(text, word2vec_model)
print("Averaged Word2Vec Vector:", average_vector)