In [1]:
!pip install pandas numpy matplotlib seaborn nltk scikit-learn wordcloud


Defaulting to user installation because normal site-packages is not writeable
Looking in links: /usr/share/pip-wheels
Collecting wordcloud
  Obtaining dependency information for wordcloud from https://files.pythonhosted.org/packages/ee/d3/67ccdab9d2756f8b30b0669015840cd5fdb5f062a2d621d67f033cf3dd54/wordcloud-1.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Downloading wordcloud-1.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Downloading wordcloud-1.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (547 kB)
[2K   [38;5;70m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.9/547.9 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m[36m0:00:01[0m
[?25hInstalling collected packages: wordcloud
[0mSuccessfully installed wordcloud-1.9.4


In [None]:
!pip install tensorflow keras


In [None]:
import pandas as pd

# Load dataset
df = pd.read_csv("Reviews.csv")

# Show basic info
df.info()

# Show first few rows
df.head()


In [None]:
# Keep only relevant columns
df = df[['Score', 'Text']]

# Drop missing values
df.dropna(inplace=True)

# Optional: Remove neutral (score=3), and map others to sentiments
df = df[df['Score'] != 3]
df['Sentiment'] = df['Score'].apply(lambda x: 1 if x > 3 else 0)  # 1 = Positive, 0 = Negative

# Text cleaning
import re
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = re.sub(r"[^a-zA-Z]", " ", text.lower())  # remove non-letters
    words = text.split()
    words = [w for w in words if w not in stop_words]
    return " ".join(words)

df['Cleaned_Text'] = df['Text'].apply(clean_text)


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(max_features=5000)
X = tfidf.fit_transform(df['Cleaned_Text'])
y = df['Sentiment']


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = {
    "Naive Bayes": MultinomialNB(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=100)
}

for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    print(f"\n{name} Results:")
    print(confusion_matrix(y_test, preds))
    print(classification_report(y_test, preds))


In [None]:
df.to_csv("CleanedReviews.csv", index=False)