In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load dataset
data = pd.read_csv("Flipkart Reviews Sentiment.csv")

# Ensure required columns exist
if 'Review' not in data.columns or 'sentiment' not in data.columns:
    raise ValueError("The dataset does not contain the required columns: 'Review' and 'sentiment'.")

# Feature extraction
X = data['Review']  # Text data
y = data['sentiment']  # Target variable

# Convert text to numerical data using TF-IDF
vectorizer = TfidfVectorizer()
X_tfidf = vectorizer.fit_transform(X)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

# Initialize and train the Naive Bayes model
model = MultinomialNB()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


  data = pd.read_csv("Flipkart Reviews Sentiment.csv")


Model Accuracy: 0.9526445264452644
Classification Report:
               precision    recall  f1-score   support

          -1       0.99      0.90      0.95      4615
           0       0.96      0.63      0.76      2570
           1       0.95      1.00      0.97     23709

    accuracy                           0.95     30894
   macro avg       0.97      0.84      0.89     30894
weighted avg       0.95      0.95      0.95     30894

Confusion Matrix:
 [[ 4176    14   425]
 [   10  1620   940]
 [   17    57 23635]]
