# Sentiment Analysis Using Naive Bayes and Logistic Regression with Vader Data

In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the Vader sentiment data
df = pd.read_csv("hogwarts_vader_sentiments.csv")

# Verify the columns
print("Columns in the DataFrame:", df.columns.tolist())

# Check for missing values in 'cleaned_title'
print("\nMissing Values in 'cleaned_title':", df['cleaned_title'].isnull().sum())

# Handle missing values in 'cleaned_title' by replacing NaN with an empty string
df['cleaned_title'] = df['cleaned_title'].fillna("")

# Prepare features and target
X = df['cleaned_title']  # Features (text data)
y = df['Sentiment'].map({"Positive": 1, "Negative": -1, "Neutral": 0})  # Target labels

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# TF-IDF Vectorization
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

# --- Naive Bayes Model ---
nb_model = MultinomialNB()
nb_model.fit(X_train_tfidf, y_train)

# Predictions and evaluation for Naive Bayes
y_pred_nb = nb_model.predict(X_test_tfidf)
print("\nNaive Bayes Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_nb):.4f}")
print("Classification Report (Naive Bayes):")
print(classification_report(y_test, y_pred_nb))
print("Confusion Matrix (Naive Bayes):")
print(confusion_matrix(y_test, y_pred_nb))

# --- Logistic Regression Model ---
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train_tfidf, y_train)

# Predictions and evaluation for Logistic Regression
y_pred_lr = lr_model.predict(X_test_tfidf)
print("\nLogistic Regression Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_lr):.4f}")
print("Classification Report (Logistic Regression):")
print(classification_report(y_test, y_pred_lr))
print("Confusion Matrix (Logistic Regression):")
print(confusion_matrix(y_test, y_pred_lr))

# --- Support Vector Machine Model ---
svm_model = LinearSVC(max_iter=1000)
svm_model.fit(X_train_tfidf, y_train)

# Predictions and evaluation for SVM
y_pred_svm = svm_model.predict(X_test_tfidf)
print("\nSVM Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_svm):.4f}")
print("Classification Report (SVM):")
print(classification_report(y_test, y_pred_svm))
print("Confusion Matrix (SVM):")
print(confusion_matrix(y_test, y_pred_svm))


Columns in the DataFrame: ['cleaned_title', 'Sentiment']

Missing Values in 'cleaned_title': 35

Naive Bayes Results:
Accuracy: 0.6550
Classification Report (Naive Bayes):
              precision    recall  f1-score   support

          -1       0.00      0.00      0.00        33
           0       0.62      0.95      0.75       108
           1       0.80      0.47      0.60        59

    accuracy                           0.66       200
   macro avg       0.47      0.48      0.45       200
weighted avg       0.57      0.66      0.58       200

Confusion Matrix (Naive Bayes):
[[  0  31   2]
 [  0 103   5]
 [  0  31  28]]

Logistic Regression Results:
Accuracy: 0.6700
Classification Report (Logistic Regression):
              precision    recall  f1-score   support

          -1       1.00      0.06      0.11        33
           0       0.64      0.95      0.76       108
           1       0.81      0.49      0.61        59

    accuracy                           0.67       200
   ma

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Sentiment Analysis Using Naive Bayes and Logistic Regression with BERT Data


# Sentiment Analysis Using Naive Bayes and Logistic Regression with GPT Data
