In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
file_path = 'reviews_sentiment.csv'
df = pd.read_csv(file_path)
# print(df.head())

print(df.isnull().sum())
df = df.dropna(subset=['review'])
label_encoder = LabelEncoder()
df['sentiments'] = label_encoder.fit_transform(df['sentiments'])

X = df['review']
y = df['sentiments']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

tfidf_vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

y_pred = model.predict(X_test_tfidf)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')

print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

                                              review sentiments
0  i wish would have gotten one earlier love it a...   positive
1  i ve learned this lesson again open the packag...    neutral
2          it is so slow and lags find better option    neutral
3  roller ball stopped working within months of m...    neutral
4  i like the color and size but it few days out ...    neutral
review        3
sentiments    0
dtype: int64
Accuracy: 0.7082
Classification Report:
              precision    recall  f1-score   support

    negative       1.00      0.01      0.02       315
     neutral       0.63      0.56      0.60      1248
    positive       0.74      0.92      0.82      1905

    accuracy                           0.71      3468
   macro avg       0.79      0.50      0.48      3468
weighted avg       0.73      0.71      0.67      3468



In [6]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import LabelEncoder

file_path = 'reviews_sentiment.csv'
df = pd.read_csv(file_path)

df = df.dropna(subset=['review'])
label_encoder = LabelEncoder()
df['sentiments'] = label_encoder.fit_transform(df['sentiments'])

X = df['review']
y = df['sentiments']

tfidf_vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X_tfidf = tfidf_vectorizer.fit_transform(X)

model = MultinomialNB()
model.fit(X_tfidf, y)

def predict_sentiment(review_text):
    review_tfidf = tfidf_vectorizer.transform([review_text])
    predicted_class = model.predict(review_tfidf)
    predicted_prob = model.predict_proba(review_tfidf)
    predicted_label = label_encoder.inverse_transform(predicted_class)[0]
    confidence = max(predicted_prob[0])
    return predicted_label, confidence

user_review = input("Review: ")
predicted_sentiment, prediction_confidence = predict_sentiment(user_review)
print(f"The predicted sentiment is '{predicted_sentiment}' with a confidence of {prediction_confidence}.")

Please enter a review: I find the mixture-grinder quite rugged and efficient. A dependable product with value for money.
The predicted sentiment is 'positive' with a confidence of 0.6843749165936368.
