In [1]:
# Import required libraries
import pandas as pd

# Load dataset
df = pd.read_csv("/content/googleplaystore_user_reviews.csv")

# Show first 5 rows
df.head()


Unnamed: 0,App,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.0,0.533333
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.25,0.288462
2,10 Best Foods for You,,,,
3,10 Best Foods for You,Works great especially going grocery store,Positive,0.4,0.875
4,10 Best Foods for You,Best idea us,Positive,1.0,0.3


In [2]:
# Select only required columns
df = df[['Translated_Review', 'Sentiment']]

# Drop missing values
df = df.dropna()

# Check dataset shape
df.shape


(37427, 2)

In [3]:
# Convert sentiment labels to numbers
df['Sentiment'] = df['Sentiment'].map({
    'Negative': 0,
    'Neutral': 1,
    'Positive': 2
})

# Check mapping
df['Sentiment'].value_counts()


Unnamed: 0_level_0,count
Sentiment,Unnamed: 1_level_1
2,23998
0,8271
1,5158


In [4]:
from sklearn.model_selection import train_test_split

X = df['Translated_Review']
y = df['Sentiment']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Convert text into numerical features
tfidf = TfidfVectorizer(
    stop_words='english',
    max_features=5000
)

X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)


In [6]:
from sklearn.linear_model import LogisticRegression

# Train the model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train)


In [7]:
from sklearn.metrics import accuracy_score, classification_report

# Predictions
y_pred = model.predict(X_test_tfidf)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Detailed report
print(classification_report(y_test, y_pred))


Accuracy: 0.8954047555436815
              precision    recall  f1-score   support

           0       0.90      0.78      0.84      1653
           1       0.84      0.81      0.82      1049
           2       0.91      0.95      0.93      4784

    accuracy                           0.90      7486
   macro avg       0.88      0.85      0.86      7486
weighted avg       0.89      0.90      0.89      7486



In [9]:
# Label mapping (number → sentiment)
label_map = {
    0: "Negative",
    1: "Neutral",
    2: "Positive"
}


In [10]:
# Convert numeric predictions to sentiment labels
y_pred_labels = [label_map[p] for p in y_pred]

# Convert actual values also (for comparison)
y_test_labels = [label_map[t] for t in y_test]


In [11]:
# Create a result DataFrame
results = pd.DataFrame({
    "Review": X_test.values[:10],
    "Actual Sentiment": y_test_labels[:10],
    "Predicted Sentiment": y_pred_labels[:10]
})

results


Unnamed: 0,Review,Actual Sentiment,Predicted Sentiment
0,Great game heats phone short time. Please rect...,Positive,Positive
1,This maths formulas I want,Neutral,Neutral
2,Some suggestions improvement 1. Change throttl...,Negative,Positive
3,"The notifications work cellphone... Otherwise,...",Neutral,Neutral
4,This helps speak Polish friends,Neutral,Positive
5,Thanks continuing provide quality support. Sti...,Positive,Positive
6,Love game,Positive,Positive
7,It's good game much need update. There ads die...,Positive,Positive
8,Make a spirit,Neutral,Neutral
9,"I love it, dislike ingame purchases. I wish co...",Positive,Positive


In [14]:
# New user review
new_review = ["This app is very slow and keeps crashing"]

# Convert text to TF-IDF
new_review_tfidf = tfidf.transform(new_review)

# Predict sentiment
prediction = model.predict(new_review_tfidf)[0]

print("Predicted Sentiment:", label_map[prediction])


Predicted Sentiment: Negative
