In [1]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score


# Load the data from the CSV file
df = pd.read_csv('Reviews(13).csv')

## Correcting the spelling of one misspelled label
df['Label'] = df['Label'].str.replace('negative', 'Negative')
df['Label'] = df['Label'].str.replace('neutral', 'Neutral')
df['Label'] = df['Label'].str.replace('positive', 'Positive')
df['Label'] = df['Label'].str.replace('Netural', 'Neutral')
df['Label'] = df['Label'].str.replace(' Neutral', 'Neutral')

df['Label'].unique()

df['Label'].value_counts()

# Preprocessing the text data
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    # Remove punctuations and special characters
    text = re.sub(r'[^\w\s]', '', text)
    # Remove stop words
    stop_words = set(stopwords.words('english'))
    text = ' '.join([word for word in text.split() if word not in stop_words])
    # Stem the words
    stemmer = PorterStemmer()
    text = ' '.join([stemmer.stem(word) for word in text.split()])
    return text

# Apply the preprocessing function to the text data
#df['review'] = df['review'].apply(preprocess_text)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['review'], df['Label'], test_size=0.2, random_state=42)

# Create a bag-of-words representation of the text data
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

# Train a Naive Bayes classifier on the training set
nb = MultinomialNB()
nb.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = nb.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")



Accuracy: 0.93
Precision: 0.93
Recall: 0.93
F1-score: 0.93


In [2]:
# Print classification report
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

    Negative       0.91      0.91      0.91      1822
     Neutral       0.86      0.84      0.85       683
    Positive       0.96      0.97      0.96      2382

    accuracy                           0.93      4887
   macro avg       0.91      0.91      0.91      4887
weighted avg       0.93      0.93      0.93      4887

