In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, precision_score, recall_score, average_precision_score
from sklearn.feature_extraction.text import TfidfVectorizer
import json

In [2]:
# load best hyperparameters
with open('../results/logistic_regression_tuning_best.json', 'r') as f:
    best_params = json.load(f)

best_max_features = best_params['max_features']
best_c = best_params['C']

print(f"Best hyperparameters: max_features={best_max_features}, C={best_c}")

Best hyperparameters: max_features=40000, C=10


In [3]:
# load data
train = pd.read_csv('../data/train.csv')
test = pd.read_csv('../data/test_1.csv')

print(f"Training data: {train.shape}")
print(f"Test data: {test.shape}")

labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

Training data: (159571, 8)
Test data: (63978, 8)


In [4]:
# vectorize data
vectorizer = TfidfVectorizer(max_features=best_max_features, ngram_range=(1,2))
X_train = vectorizer.fit_transform(train['comment_text'])
X_test = vectorizer.transform(test['comment_text'])

print(f"Feature matrix shape: {X_train.shape}")

Feature matrix shape: (159571, 40000)


In [5]:
# train final models
print(f"Training final logistic regression models with C={best_c}, max_features={best_max_features}\n")

# models
models = {}

# train one model for each label
for label in labels:
    print(f"Training model for {label}...")
    
    y_train = train[label]
    
    model = LogisticRegression(C=best_c, penalty='l2', class_weight='balanced', max_iter=5000)
    model.fit(X_train, y_train)
    
    models[label] = model

print("\nAll models trained!")

Training final logistic regression models with C=10, max_features=40000

Training model for toxic...
Training model for severe_toxic...
Training model for obscene...
Training model for threat...
Training model for insult...
Training model for identity_hate...

All models trained!


In [6]:
# evaluate on test set
per_label_metrics = {}
label_precisions = []
label_recalls = []
label_f1s = []
label_auc_prs = []

for label in labels:
    # get predictions
    model = models[label]
    y_pred = model.predict(X_test)
    y_scores = model.decision_function(X_test)
    
    # calculate per-label metrics
    precision = precision_score(test[label], y_pred, zero_division=0)
    recall = recall_score(test[label], y_pred, zero_division=0)
    f1 = f1_score(test[label], y_pred, zero_division=0)
    auc_pr = average_precision_score(test[label], y_scores)
    
    # store per-label metrics
    per_label_metrics[label] = {
        'precision': round(precision, 3),
        'recall': round(recall, 3),
        'f1': round(f1, 3),
        'auc_pr': round(auc_pr, 3)
    }
    
    label_precisions.append(precision)
    label_recalls.append(recall)
    label_f1s.append(f1)
    label_auc_prs.append(auc_pr)

# calculate macro metrics
test_metrics = {
    "macro_precision": round(np.mean(label_precisions), 3),
    "macro_recall": round(np.mean(label_recalls), 3),
    "macro_f1": round(np.mean(label_f1s), 3),
    "macro_auc_pr": round(np.mean(label_auc_prs), 3),
    "per_label_metrics": per_label_metrics
}

# display results
print("=" * 50)
print("TEST SET RESULTS")
print("=" * 50)
print(f"Macro Precision: {test_metrics['macro_precision']:.3f}")
print(f"Macro Recall:    {test_metrics['macro_recall']:.3f}")
print(f"Macro F1:        {test_metrics['macro_f1']:.3f}")
print(f"Macro AUC-PR:    {test_metrics['macro_auc_pr']:.3f}")
print("\nPer-label metrics:")
print(f"{'Label':<15} {'Precision':<12} {'Recall':<12} {'F1':<12} {'AUC-PR':<12}")
print("-" * 63)
for label in labels:
    m = per_label_metrics[label]
    print(f"{label:<15} {m['precision']:<12.3f} {m['recall']:<12.3f} {m['f1']:<12.3f} {m['auc_pr']:<12.3f}")

TEST SET RESULTS
Macro Precision: 0.347
Macro Recall:    0.820
Macro F1:        0.477
Macro AUC-PR:    0.576

Per-label metrics:
Label           Precision    Recall       F1           AUC-PR      
---------------------------------------------------------------
toxic           0.454        0.891        0.602        0.755       
severe_toxic    0.171        0.817        0.282        0.313       
obscene         0.479        0.855        0.614        0.767       
threat          0.253        0.777        0.382        0.422       
insult          0.462        0.795        0.584        0.694       
identity_hate   0.265        0.785        0.396        0.503       


In [7]:
# save final metrics
results = {
    "macro_precision": test_metrics['macro_precision'],
    "macro_recall": test_metrics['macro_recall'],
    "macro_f1": test_metrics['macro_f1'],
    "macro_auc_pr": test_metrics['macro_auc_pr'],
    "per_label_auc_pr": {label: per_label_metrics[label]['auc_pr'] for label in labels}
}

with open('../results/logistic_regression_results.json', 'w') as f:
    json.dump(results, f, indent=2)

print("Results saved to ../results/logistic_regression_results.json")
print(json.dumps(results, indent=2))

Results saved to ../results/logistic_regression_results.json
{
  "macro_precision": 0.347,
  "macro_recall": 0.82,
  "macro_f1": 0.477,
  "macro_auc_pr": 0.576,
  "per_label_auc_pr": {
    "toxic": 0.755,
    "severe_toxic": 0.313,
    "obscene": 0.767,
    "threat": 0.422,
    "insult": 0.694,
    "identity_hate": 0.503
  }
}
