## 1. Creating the Synthetic Dataset

In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

# Create synthetic dataset
def create_synthetic_reviews():
    # Positive review templates
    positive_templates = [
        "This movie was {adj}! I loved the {noun} and the {noun} was amazing.",
        "A {adj} masterpiece. The {noun} was brilliant and the {noun} was perfect.",
        "I enjoyed every minute. The {noun} was {adj} and the {noun} was {adj}.",
        "What a {adj} film! The {noun} was outstanding and the {noun} was superb.",
        "{adj} performance by the cast. The {noun} was excellent and the {noun} was great.",
    ]
    
    # Negative review templates
    negative_templates = [
        "This movie was {adj}. I hated the {noun} and the {noun} was terrible.",
        "A {adj} disaster. The {noun} was awful and the {noun} was bad.",
        "I disliked every minute. The {noun} was {adj} and the {noun} was {adj}.",
        "What a {adj} film! The {noun} was poor and the {noun} was disappointing.",
        "{adj} performance by the cast. The {noun} was weak and the {noun} was mediocre.",
    ]
    
    # Words to fill in templates
    adjectives_pos = ['fantastic', 'wonderful', 'incredible', 'awesome', 'remarkable']
    adjectives_neg = ['horrible', 'terrible', 'boring', 'awful', 'disappointing']
    nouns = ['plot', 'acting', 'cinematography', 'dialogue', 'ending', 'character development', 
             'soundtrack', 'direction', 'visual effects', 'storyline']
    
    reviews = []
    sentiments = []
    
    # Generate 50 positive reviews
    for _ in range(50):
        template = np.random.choice(positive_templates)
        adj = np.random.choice(adjectives_pos)
        noun1, noun2 = np.random.choice(nouns, 2, replace=True)
        review = template.format(adj=adj, noun=noun1).replace("noun", noun2, 1)
        reviews.append(review)
        sentiments.append('positive')
    
    # Generate 50 negative reviews
    for _ in range(50):
        template = np.random.choice(negative_templates)
        adj = np.random.choice(adjectives_neg)
        noun1, noun2 = np.random.choice(nouns, 2, replace=True)
        review = template.format(adj=adj, noun=noun1).replace("noun", noun2, 1)
        reviews.append(review)
        sentiments.append('negative')
    
    return pd.DataFrame({'Review': reviews, 'Sentiment': sentiments})

# Create the dataset
reviews_df = create_synthetic_reviews()

# Display sample of the data
print("Sample of the dataset:")
print(reviews_df.head())
print("\nSentiment distribution:")
print(reviews_df['Sentiment'].value_counts())

Sample of the dataset:
                                              Review Sentiment
0  A remarkable masterpiece. The plot was brillia...  positive
1  I enjoyed every minute. The soundtrack was won...  positive
2  remarkable performance by the cast. The charac...  positive
3  What a fantastic film! The acting was outstand...  positive
4  This movie was incredible! I loved the storyli...  positive

Sentiment distribution:
Sentiment
positive    50
negative    50
Name: count, dtype: int64


## 2. Tokenize the reviews

In [6]:
vectorizer = CountVectorizer(max_features=500, stop_words='english')
X = vectorizer.fit_transform(reviews_df['Review'])
y = reviews_df['Sentiment']

## 3. Split the dataset

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 4. Train the classifier

In [12]:
# Train the classifier
classifier = MultinomialNB()
classifier.fit(X_train, y_train)

# Evaluate on test set
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel accuracy on test set: {accuracy:.2f}")


Model accuracy on test set: 1.00


## 5. Prediction function

In [15]:
# Prediction function
def predict_review_sentiment(model, vectorizer, review):
    """
    Predict the sentiment of a single review.
    
    Parameters:
    model -- trained classifier model
    vectorizer -- fitted CountVectorizer
    review -- string containing the review text
    
    Returns:
    Predicted sentiment ('positive' or 'negative')
    """
    # Vectorize the review
    review_vec = vectorizer.transform([review])
    # Predict and return the sentiment
    return model.predict(review_vec)[0]

# Test the prediction function
test_review = "This movie was fantastic with amazing acting and brilliant cinematography."
predicted_sentiment = predict_review_sentiment(classifier, vectorizer, test_review)
print(f"\nTest review: '{test_review}'")
print(f"Predicted sentiment: {predicted_sentiment}")

test_review2 = "I hated everything about this boring film with terrible acting."
predicted_sentiment2 = predict_review_sentiment(classifier, vectorizer, test_review2)
print(f"\nTest review: '{test_review2}'")
print(f"Predicted sentiment: {predicted_sentiment2}")


Test review: 'This movie was fantastic with amazing acting and brilliant cinematography.'
Predicted sentiment: positive

Test review: 'I hated everything about this boring film with terrible acting.'
Predicted sentiment: negative
