### Evaluation & Visualization

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

from src.evaluate import plot_confusion_matrix, print_classification_metrics, generate_wordcloud

# Load preprocessed data
df = pd.read_csv("../data/processed/clean_data.csv")

# Quick word cloud
generate_wordcloud(df["cleaned_text"])

# Split and vectorize
X = TfidfVectorizer(max_features=5000).fit_transform(df["cleaned_text"])
y = df["label"]
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a simple model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_val)

# Evaluate
print_classification_metrics(y_val, y_pred)
plot_confusion_matrix(y_val, y_pred)
