In [3]:
!pip install pandas numpy scikit-learn nltk



In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\dasdi\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\dasdi\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\dasdi\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [8]:
# Load the dataset
data = pd.read_csv('Reviews_And_Ratings.csv')

# Keep only relevant columns
data = data[['Rating', 'translated']]

# Drop missing values
data.dropna(inplace=True)

# Display the first few rows
data.head()

Unnamed: 0,Rating,translated
0,5,What to say? My daughter LOVES him and I with ...
1,5,"I decided to test this plush to my grandson, j..."
2,5,"An unusual pet, this otter is beautiful! Cute ..."
3,4,"Beautiful, soft and very relaxing. It comes wi..."
4,4,"Plush tender, my 16-month-old loves it, sleeps..."


In [9]:
# Map ratings to sentiments
def map_sentiment(rating):
    if rating <= 2:
        return 0  # Negative
    elif rating == 3:
        return 1  # Neutral
    else:
        return 2  # Positive

data['sentiment'] = data['Rating'].apply(map_sentiment)
data = data[['translated', 'sentiment']]
data.head()

Unnamed: 0,translated,sentiment
0,What to say? My daughter LOVES him and I with ...,2
1,"I decided to test this plush to my grandson, j...",2
2,"An unusual pet, this otter is beautiful! Cute ...",2
3,"Beautiful, soft and very relaxing. It comes wi...",2
4,"Plush tender, my 16-month-old loves it, sleeps...",2


In [10]:
# Lowercase, tokenize, remove stopwords, and lemmatize
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    filtered_tokens = [lemmatizer.lemmatize(word) for word in tokens if word.isalnum() and word not in stop_words]
    return ' '.join(filtered_tokens)

data['cleaned_review'] = data['translated'].apply(preprocess_text)
data = data[['cleaned_review', 'sentiment']]
data.head()

Unnamed: 0,cleaned_review,sentiment
0,say daughter love soft zero visible seam tende...,2
1,decided test plush grandson three year old soo...,2
2,unusual pet otter beautiful cute melody light ...,2
3,beautiful soft relaxing come behind back key s...,2
4,plush tender love sleep hugged cuddle even wak...,2


In [11]:
X = data['cleaned_review']
y = data['sentiment']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
vectorizer = CountVectorizer()
X_train_vectors = vectorizer.fit_transform(X_train)
X_test_vectors = vectorizer.transform(X_test)

In [13]:
model = MultinomialNB()
model.fit(X_train_vectors, y_train)

In [14]:
y_pred = model.predict(X_test_vectors)

conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

conf_matrix, classification_rep, accuracy

(array([[ 10,   2,  12],
        [  2,   1,  11],
        [  2,   0, 267]], dtype=int64),
 '              precision    recall  f1-score   support\n\n           0       0.71      0.42      0.53        24\n           1       0.33      0.07      0.12        14\n           2       0.92      0.99      0.96       269\n\n    accuracy                           0.91       307\n   macro avg       0.66      0.49      0.53       307\nweighted avg       0.88      0.91      0.88       307\n',
 0.9055374592833876)

In [15]:
new_reviews = ["This product is amazing!", "Worst purchase ever.", "It was okay, nothing special."]
new_reviews_cleaned = [preprocess_text(review) for review in new_reviews]
new_reviews_vectors = vectorizer.transform(new_reviews_cleaned)
predictions = model.predict(new_reviews_vectors)

# Map predictions back to sentiment labels
sentiment_labels = {0: 'negative', 1: 'neutral', 2: 'positive'}
predicted_sentiments = [sentiment_labels[pred] for pred in predictions]

predicted_sentiments

['positive', 'positive', 'positive']

In [None]:
# This is doing by Naive Bayes Classifier.