In [None]:
import gensim.downloader as api
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

In [6]:
import numpy as np

In [None]:

# Load Google's pretrained Word2Vec model (300-dimensional vectors)
print("Loading Word2Vec model...")
model = api.load("word2vec-google-news-300")
print("Model loaded!")

# Example labeled sentences for classification
sentences = [
    ("I love this movie, it's fantastic!", "positive"),
    ("This restaurant has amazing food.", "positive"),
    ("The weather is terrible today.", "negative"),
    ("I hate being stuck in traffic.", "negative"),
    ("This phone has great battery life.", "positive"),
    ("The service at the hotel was awful.", "negative"),
]

# Function to convert a sentence into a vector by averaging word embeddings
def sentence_to_vector(sentence, model):
    words = sentence.lower().split()  # Tokenize sentence
    word_vectors = [model[word] for word in words if word in model]  # Get word embeddings
    if len(word_vectors) == 0:
        return np.zeros(300)  # Return zero vector if no known words are found
    return np.mean(word_vectors, axis=0)  # Average word vectors

# Prepare training data
X = np.array([sentence_to_vector(text, model) for text, label in sentences])
y = np.array([label for text, label in sentences])

# Encode labels
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)  # Convert "positive"/"negative" to 0/1

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train a simple classifier (Logistic Regression)
classifier = LogisticRegression()
classifier.fit(X_train, y_train)

# Evaluate the model
accuracy = classifier.score(X_test, y_test)
print(f"Model Accuracy: {accuracy:.2f}")

# Test the model with a new sentence
test_sentence = "I really enjoyed this book!"
vector = sentence_to_vector(test_sentence, model)
prediction = classifier.predict([vector])[0]
print(f"Sentence: '{test_sentence}' → Predicted Sentiment: {encoder.inverse_transform([prediction])[0]}")
