In [None]:
import os
import csv

def read_processed_csv(csv_path):
    texts = []
    labels = []

    with open(csv_path, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)

        for row in reader:
            try:
                comment = row["comment"].strip()
                label = int(row["viral"])

                # Basic validation
                if not comment:  # Skip empty comments
                    continue

                if label not in (0, 1):  # Ensure binary label
                    continue

                texts.append(comment)
                labels.append(label)

            except (ValueError, KeyError) as e:
                # Skip rows with missing or invalid data
                continue

    return texts, labels

In [None]:
import numpy as np
import random

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

texts, labels = read_processed_csv('dataset_processed.csv')

pos_texts = []
pos_labels = []
neg_texts = []
neg_labels = []

for t, l in zip(texts, labels):
    if l == 1:
        pos_texts.append(t)
        pos_labels.append(1)
    elif l == 0:
        neg_texts.append(t)
        neg_labels.append(0)

ct = len(pos_labels)

k = min(len(neg_texts), ct * 5)
neg_indices = random.sample(range(len(neg_texts)), k)

neg_texts_sampled = [neg_texts[i] for i in neg_indices]
neg_labels_sampled = [neg_labels[i] for i in neg_indices]

# Final balanced dataset
texts_balanced = pos_texts + neg_texts_sampled
labels_balanced = pos_labels + neg_labels_sampled

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(texts_balanced)
y = np.array(labels_balanced, dtype=int)

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.3,
    random_state=42,
    stratify=y
)

model = MultinomialNB()
model.fit(X_train, y_train)

y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

print("Training accuracy:", accuracy_score(y_train, y_train_pred))
print("Test accuracy:", accuracy_score(y_test, y_test_pred))

print("Train classification report:")
print(classification_report(y_train, y_train_pred, digits=3))

print("Test classification report:")
print(classification_report(y_test, y_test_pred, digits=3))


Reading: dataset_processed.csv
✓ Loaded 129428 comments (21572 viral, 107856 non-viral)
Training accuracy: 0.8862459850550227
Test accuracy: 0.8535115506451364

--- Train classification report ---
              precision    recall  f1-score   support

           0      0.919     0.947     0.933     75499
           1      0.688     0.581     0.630     15100

    accuracy                          0.886     90599
   macro avg      0.803     0.764     0.781     90599
weighted avg      0.880     0.886     0.882     90599


--- Test classification report ---
              precision    recall  f1-score   support

           0      0.905     0.921     0.913     32357
           1      0.566     0.518     0.541      6472

    accuracy                          0.854     38829
   macro avg      0.736     0.719     0.727     38829
weighted avg      0.849     0.854     0.851     38829



In [None]:
def top_discriminative_words(model, vectorizer, k=20):
    feature_names = np.array(vectorizer.get_feature_names_out())

    # log P(word|class1) - log P(word|class0)
    log_odds = model.feature_log_prob_[1] - model.feature_log_prob_[0]

    # Most positive → strong for class 1
    idx_pos = np.argsort(log_odds)[::-1][:k]
    # Most negative → strong for class 0
    idx_neg = np.argsort(log_odds)[:k]

    top_class1 = list(zip(feature_names[idx_pos], log_odds[idx_pos]))
    top_class0 = list(zip(feature_names[idx_neg], log_odds[idx_neg]))

    return top_class0, top_class1

top0, top1 = top_discriminative_words(model, vectorizer, k=20)

print("Top words for class 0:")
for w, score in top0:
    print(f"{w:25s} {score: .3f}")

print("\nTop words for class 1:")
for w, score in top1:
    print(f"{w:25s} {score: .3f}")

Top words for class 0 (negative / non-toxic):
raid                      -6.976
que                       -4.414
yusufte                   -3.654
felipe                    -3.588
русский                   -3.530
neto                      -3.505
eu                        -3.415
не                        -3.285
faze                      -3.254
br                        -3.238
birthday                  -3.188
se                        -3.188
en                        -3.188
español                   -3.154
te                        -3.043
mods                      -3.003
modded                    -3.003
sou                       -2.982
русские                   -2.939
indonesia                 -2.917

Top words for class 1 (positive / toxic):
awakens                    6.473
triology                   5.913
remastered                 4.153
router                     3.934
screeches                  3.597
vocabulary                 3.241
sunset                     3.241
acrobat             