In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:
def main(file_name):
    # Load preprocessed CSV
    df = pd.read_csv(file_name)

    # Features and labels
    X = df['Post_Preprocessed_Comments']
    y = df['label']

    # Split train/test
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=82, stratify=y
    )

    # Vectorize text with TF-IDF
    vectorizer = TfidfVectorizer(max_features=5000)
    X_train_vect = vectorizer.fit_transform(X_train)
    X_test_vect = vectorizer.transform(X_test)

    # Define models
    models = {
        "Naive Bayes": MultinomialNB(),
        "Logistic Regression": LogisticRegression(max_iter=1000),
        "SVM": SVC(kernel='linear'),
        "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42)
    }

    # Train and evaluate
    for name, model in models.items():
        model.fit(X_train_vect, y_train)
        y_pred = model.predict(X_test_vect)
        
        print(f"==== {name} ====")
        print("Accuracy:", accuracy_score(y_test, y_pred))
        print(classification_report(y_test, y_pred))
        print("\n")

In [3]:
main("comments_preprocessed_post_labeling.csv")

==== Naive Bayes ====
Accuracy: 0.7142857142857143
              precision    recall  f1-score   support

    negative       0.71      1.00      0.83        95
     neutral       0.00      0.00      0.00        26
    positive       0.00      0.00      0.00        12

    accuracy                           0.71       133
   macro avg       0.24      0.33      0.28       133
weighted avg       0.51      0.71      0.60       133



==== Logistic Regression ====
Accuracy: 0.7218045112781954
              precision    recall  f1-score   support

    negative       0.72      1.00      0.84        95
     neutral       1.00      0.04      0.07        26
    positive       0.00      0.00      0.00        12

    accuracy                           0.72       133
   macro avg       0.57      0.35      0.30       133
weighted avg       0.71      0.72      0.61       133



==== SVM ====
Accuracy: 0.7293233082706767
              precision    recall  f1-score   support

    negative       0.73   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


==== Random Forest ====
Accuracy: 0.7293233082706767
              precision    recall  f1-score   support

    negative       0.73      0.99      0.84        95
     neutral       0.50      0.04      0.07        26
    positive       1.00      0.17      0.29        12

    accuracy                           0.73       133
   macro avg       0.74      0.40      0.40       133
weighted avg       0.71      0.73      0.64       133



