In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# Load the dataset
file_path = "YoutubeCommentsDataSet.csv"
df = pd.read_csv(file_path)

# Drop missing values
df = df.dropna()

# Split data into features and labels
X = df["Comment"]
y = df["Sentiment"]

# Convert text data into numerical vectors
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_vectorized = vectorizer.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

# Train an SVM model
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)

# Evaluate the model
y_pred = svm_model.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Function to classify new comments
def classify_comment(comment):
    comment_vectorized = vectorizer.transform([comment])
    prediction = svm_model.predict(comment_vectorized)
    return prediction[0]

# Take user input
user_comment = input("Enter a comment: ")
print("Predicted Sentiment:", classify_comment(user_comment))


Classification Report:
              precision    recall  f1-score   support

    negative       0.53      0.35      0.42       441
     neutral       0.59      0.60      0.59       912
    positive       0.83      0.88      0.85      2320

    accuracy                           0.74      3673
   macro avg       0.65      0.61      0.62      3673
weighted avg       0.73      0.74      0.74      3673

Enter a comment: I love this product! It's amazing.
Predicted Sentiment: positive
