In [6]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Sample data containing documents and their labels (positive or negative)
documents = [
    ("This movie is fantastic, I loved every moment! The acting was superb and the plot was gripping. I highly recommend it to everyone.", "positive"),
    ("The acting in this movie was terrible, I couldn't wait for it to end. The plot was predictable and boring. I do not recommend it at all.", "negative"),
    ("The plot twists in this film were unexpected and kept me on the edge of my seat. The cinematography was stunning and the performances were top-notch. A must-watch!", "positive"),
    ("I was bored throughout the entire film, a waste of time. The acting was wooden and the story was uninspired. I regret watching it.", "negative"),
    ("Absolutely amazing! This movie blew me away. The story, the acting, the visuals, everything was perfect. A masterpiece!", "positive"),
    ("Disappointing. The hype for this movie was unwarranted. The story was lackluster and the characters were not engaging. I expected more.", "negative"),
    ("I couldn't stop laughing! This comedy is a gem. The jokes were hilarious and the performances were brilliant. Highly recommended for a good laugh.", "positive"),
    ("Awful. This horror movie was more comical than scary. The acting was laughable and the plot was ridiculous. Not worth watching.", "negative"),
    # Add more documents as needed
]

# Extract texts and labels from the sample data
texts = [doc[0] for doc in documents]
labels = [doc[1] for doc in documents]

# Polarity words
positive_words = ['fantastic', 'loved', 'unexpected', 'edge', 'superb', 'gripping', 'recommend', 'stunning', 'top-notch', 'must-watch', 'amazing', 'hilarious', 'brilliant']
negative_words = ['terrible', 'bored', 'waste', 'predictable', 'boring', 'regret', 'awful', 'laughable', 'ridiculous']

# Create feature vectors using CountVectorizer
vectorizer = CountVectorizer(vocabulary=positive_words+negative_words)
X = vectorizer.fit_transform(texts).toarray()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Test with new documents
new_documents = [
    "One of the best movies I've ever seen. The story was compelling and the acting was outstanding.",
    "I don't understand the hype around this movie. It was boring and uneventful.",
    "The book was much better than the movie. The adaptation fell flat and failed to capture the essence of the story.",
    "A rollercoaster of emotions. This movie made me laugh, cry, and everything in between. A true masterpiece.",
    "I was disappointed by this movie. The trailer promised so much more than the actual film delivered.",
    "A complete waste of time. I regret spending money on tickets for this movie.",
]

X_new = vectorizer.transform(new_documents)
predictions = model.predict(X_new)

print("Predictions for new documents:")
for doc, label in zip(new_documents, predictions):
    sentiment = "Positive" if label == "positive" else "Negative"
    print(f"Document: {doc} \nPredicted Label: {sentiment}\n")



Accuracy: 0.0
Predictions for new documents:
Document: One of the best movies I've ever seen. The story was compelling and the acting was outstanding. 
Predicted Label: Positive

Document: I don't understand the hype around this movie. It was boring and uneventful. 
Predicted Label: Positive

Document: The book was much better than the movie. The adaptation fell flat and failed to capture the essence of the story. 
Predicted Label: Positive

Document: A rollercoaster of emotions. This movie made me laugh, cry, and everything in between. A true masterpiece. 
Predicted Label: Positive

Document: I was disappointed by this movie. The trailer promised so much more than the actual film delivered. 
Predicted Label: Positive

Document: A complete waste of time. I regret spending money on tickets for this movie. 
Predicted Label: Positive

