In [1]:
import pandas as pd

import nltk

from nltk.sentiment.vader import SentimentIntensityAnalyzer

from nltk.corpus import stopwords

from nltk.tokenize import word_tokenize

from nltk.stem import WordNetLemmatizer

In [3]:
df = pd.read_csv('/Users/sherazhasan/Desktop/test3/scripts-archive/sentiment.csv')
df

Unnamed: 0,reviewText,Positive
0,This is a one of the best apps acording to a b...,1
1,This is a pretty good version of the game for ...,1
2,this is a really cool game. there are a bunch ...,1
3,"This is a silly game and can be frustrating, b...",1
4,This is a terrific game on any pad. Hrs of fun...,1
...,...,...
666,A fun game without having to put very much tho...,1
667,Addictive!! I really wish Amazon would not ma...,1
668,who dosent love the birds it was free on my ki...,1
669,I love unlocking bonus levels. I also love gra...,1


In [4]:
def preprocess_text(text):
    # Tokenize the text
    tokens = word_tokenize(text.lower())
    # Remove stop words
    filtered_tokens = [token for token in tokens if token not in stopwords.words('english')]
    
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]
    # Join the tokens back into a string
    processed_text = ' '.join(lemmatized_tokens)
    return processed_text

df['reviewText'] = df['reviewText'].apply(preprocess_text)
df

Unnamed: 0,reviewText,Positive
0,one best apps acording bunch people agree bomb...,1
1,pretty good version game free . lot different ...,1
2,really cool game . bunch level find golden egg...,1
3,"silly game frustrating , lot fun definitely re...",1
4,terrific game pad . hr fun . grandkids love . ...,1
...,...,...
666,fun game without put much thought . n't like a...,1
667,addictive ! ! really wish amazon would make ri...,1
668,dosent love bird free kindle truck driver im s...,1
669,love unlocking bonus level . also love graphic...,1


In [5]:
# initialize NLTK sentiment analyzer
analyzer = SentimentIntensityAnalyzer()
# create get_sentiment function
def get_sentiment(text):
    scores = analyzer.polarity_scores(text)
    sentiment = 1 if scores['pos'] > 0 else 0
    return sentiment

df['sentiment'] = df['reviewText'].apply(get_sentiment)
df

Unnamed: 0,reviewText,Positive,sentiment
0,one best apps acording bunch people agree bomb...,1,1
1,pretty good version game free . lot different ...,1,1
2,really cool game . bunch level find golden egg...,1,1
3,"silly game frustrating , lot fun definitely re...",1,1
4,terrific game pad . hr fun . grandkids love . ...,1,1
...,...,...,...
666,fun game without put much thought . n't like a...,1,1
667,addictive ! ! really wish amazon would make ri...,1,1
668,dosent love bird free kindle truck driver im s...,1,1
669,love unlocking bonus level . also love graphic...,1,1


In [6]:
from sklearn.metrics import confusion_matrix

print(confusion_matrix(df['Positive'], df['sentiment']))

[[ 22  68]
 [ 14 567]]


In [7]:
from sklearn.metrics import classification_report

print(classification_report(df['Positive'], df['sentiment']))

              precision    recall  f1-score   support

           0       0.61      0.24      0.35        90
           1       0.89      0.98      0.93       581

    accuracy                           0.88       671
   macro avg       0.75      0.61      0.64       671
weighted avg       0.86      0.88      0.85       671



In [22]:
sample_reviews = [
    "I did not love this product, it's amazing!",
    "This is the not worst experience I've ever had.",
    "The quality is okay, but could be improved.",
    "Fantastic service, very satisfied!",
    "I would not recommend this to anyone."
]

# Test the get_sentiment function with sample reviews
for review in sample_reviews:
    sentiment = get_sentiment(review)
    sentiment_label = "Positive" if sentiment == 1 else "Negative"
    print(f"Review: '{review}'\nPredicted Sentiment: {sentiment_label}\n")


Review: 'I did not love this product, it's amazing!'
Predicted Sentiment: Positive

Review: 'This is the not worst experience I've ever had.'
Predicted Sentiment: Positive

Review: 'The quality is okay, but could be improved.'
Predicted Sentiment: Positive

Review: 'Fantastic service, very satisfied!'
Predicted Sentiment: Positive

Review: 'I would not recommend this to anyone.'
Predicted Sentiment: Negative



In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

In [11]:
X = df['reviewText']
y = df['Positive']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [13]:
# Convert text data into numerical data using CountVectorizer
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [14]:
# Initialize and train the Multinomial Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train_vec, y_train)


In [15]:
y_pred = model.predict(X_test_vec)


In [16]:
X_vec = vectorizer.transform(X)  # Transform the entire dataset
df['NaiveBayesSentiment'] = model.predict(X_vec)

In [17]:
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Classification Report:\n{classification_report(y_test, y_pred)}")

Accuracy: 0.8740740740740741
Classification Report:
              precision    recall  f1-score   support

           0       0.60      0.17      0.26        18
           1       0.88      0.98      0.93       117

    accuracy                           0.87       135
   macro avg       0.74      0.57      0.60       135
weighted avg       0.85      0.87      0.84       135



In [18]:
print(df)

                                            reviewText  Positive  sentiment  \
0    one best apps acording bunch people agree bomb...         1          1   
1    pretty good version game free . lot different ...         1          1   
2    really cool game . bunch level find golden egg...         1          1   
3    silly game frustrating , lot fun definitely re...         1          1   
4    terrific game pad . hr fun . grandkids love . ...         1          1   
..                                                 ...       ...        ...   
666  fun game without put much thought . n't like a...         1          1   
667  addictive ! ! really wish amazon would make ri...         1          1   
668  dosent love bird free kindle truck driver im s...         1          1   
669  love unlocking bonus level . also love graphic...         1          1   
670  much fun , addicting . yes , craze got * * big...         1          1   

     NaiveBayesSentiment  
0                      0

In [20]:
# Sample reviews for testing
sample_reviews = [
    "I love this product, it's amazing!",
    "This is the worst experience I've ever had.",
    "The quality is okay, but could be improved.",
    "Fantastic service, very satisfied!",
    "I would not recommend this to anyone."
]

# Transform the sample reviews using the previously fitted CountVectorizer
sample_reviews_vec = vectorizer.transform(sample_reviews)

# Make predictions using the trained model
sample_predictions = model.predict(sample_reviews_vec)

# Print the sample reviews and their predicted sentiments
for review, prediction in zip(sample_reviews, sample_predictions):
    sentiment = "Positive" if prediction == 1 else "Negative"
    print(f"Review: '{review}'\nPredicted Sentiment: {sentiment}\n")


Review: 'I love this product, it's amazing!'
Predicted Sentiment: Positive

Review: 'This is the worst experience I've ever had.'
Predicted Sentiment: Negative

Review: 'The quality is okay, but could be improved.'
Predicted Sentiment: Negative

Review: 'Fantastic service, very satisfied!'
Predicted Sentiment: Positive

Review: 'I would not recommend this to anyone.'
Predicted Sentiment: Positive



In [19]:
df.to_csv('sentiment.csv', index=False)

# Print confirmation message
print("Dataset successfully updated and saved.")

Dataset successfully updated and saved.
