In [None]:
#Sentiment analysis using Random Forest Classifier
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
import string
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,  precision_recall_fscore_support

In [None]:
df = pd.read_csv('Combined Data.csv')
df.describe()
print(df.columns)
if "Unnamed: 0" in df.columns:
	df = df.drop(columns=["Unnamed: 0"])
df = df.dropna(subset=["statement"])
print(df.isnull().sum())

In [None]:
# Text preprocessing
def clean_text(text):
    text = text.lower()
    text = re.sub(f"[{re.escape(string.punctuation)}]", "", text)
    text = re.sub(r"\d+", "", text)
    return text.strip()
df["clean_statement"] = df["statement"].apply(clean_text)
label_encoder = LabelEncoder()
df["encoded_status"] = label_encoder.fit_transform(df["status"])

In [None]:
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['clean_statement'])
y = df['encoded_status']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = RandomForestClassifier()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='binary')

In [None]:
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
print(f"Precision: {precision:.2f}")
print(f"Recall:    {recall:.2f}")
print(f"F1 Score:  {f1:.2f}")

In [None]:
user_input = input("\nEnter a review statement: ")
user_cleaned = clean_text(user_input)
user_vector = vectorizer.transform([user_cleaned])
user_prediction = model.predict(user_vector)[0]
sentiment = "Positive (Normal)" if user_prediction == 1 else "Negative (Abnormal)"
print("\nReview Sentiment:", sentiment)