# Sentiment Analysis Using SVM with Vader Data

In [5]:
# Import necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the CSV file and handle missing values
df = pd.read_csv('hogwarts_vader_sentiments.csv')

# Check and handle missing values
df['cleaned_title'] = df['cleaned_title'].fillna('')

# Ensure there are no missing values in 'Sentiment'
df = df.dropna(subset=['Sentiment'])
X = df['cleaned_title']
y = df['Sentiment'].map({"Positive": 1, "Negative": -1, "Neutral": 0})

# Check if missing values are handled
print("\nMissing values in 'cleaned_title' after handling:", df['cleaned_title'].isnull().sum())
print("Missing values in 'Sentiment' after handling:", y.isnull().sum())

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# TF-IDF Vectorization
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

# --- Support Vector Machine Model ---
svm_model = LinearSVC(max_iter=1000)
svm_model.fit(X_train_tfidf, y_train)

# Predictions and evaluation for SVM
y_pred_svm = svm_model.predict(X_test_tfidf)

# Print evaluation results
print("\nSVM Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_svm):.4f}")
print("Classification Report (SVM):")
print(classification_report(y_test, y_pred_svm))
print("Confusion Matrix (SVM):")
print(confusion_matrix(y_test, y_pred_svm))



Missing values in 'cleaned_title' after handling: 0
Missing values in 'Sentiment' after handling: 0

SVM Results:
Accuracy: 0.7700
Classification Report (SVM):
              precision    recall  f1-score   support

          -1       0.80      0.36      0.50        33
           0       0.74      0.92      0.82       108
           1       0.83      0.73      0.77        59

    accuracy                           0.77       200
   macro avg       0.79      0.67      0.70       200
weighted avg       0.78      0.77      0.75       200

Confusion Matrix (SVM):
[[12 19  2]
 [ 2 99  7]
 [ 1 15 43]]


# Sentiment Analysis Using SVM with BERT Data