# Feature Extractor Efficacy Analysis
This notebook evaluates the efficacy of each feature extractor against an extremist vs non-extremist dataset.
We'll train a simple logistic regression model using features from each extractor separately and report performance metrics.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import warnings
warnings.filterwarnings('ignore')

# Import feature extractors
from action_indication import ActionIndicationAnalyzer
from extremist_reference_improved import ExtremistReferenceAnalyzer
from hate_sentiment_improved import HateSentimentAnalyzer
from violence_sentiment_improved import ViolenceSentimentAnalyzer
from overall_sentiment_improved import OverallSentimentAnalyzer

# Initialize analyzers
action_analyzer    = ActionIndicationAnalyzer()
extr_ref_analyzer  = ExtremistReferenceAnalyzer()
hate_analyzer      = HateSentimentAnalyzer()
violence_analyzer  = ViolenceSentimentAnalyzer()
overall_analyzer   = OverallSentimentAnalyzer()

In [None]:
# Load dataset
df = pd.read_csv('path/to/extremism_dataset.csv')  # adjust path
texts = df['text'].astype(str).tolist()
labels = df['label'].astype(int).tolist()

In [None]:

def evaluate_extractor(name, extractor_func, texts, labels):
    print(f"## Evaluating: {name}\n")
    # Extract features
    import pandas as pd
    feats_list = []
    for t in texts:
        feats_list.append(extractor_func(t))
    X = pd.DataFrame(feats_list).fillna(0)
    y = labels
    # Split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                        stratify=y, random_state=42)
    # Train
    clf = LogisticRegression(max_iter=1000, class_weight='balanced')
    clf.fit(X_train, y_train)
    # Predict
    y_pred = clf.predict(X_test)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))


## Action Indication Features

In [None]:
evaluate_extractor('Action Indication', action_analyzer.detect_call_to_action_patterns, texts, labels)

## Extremist References Features

In [None]:
evaluate_extractor('Extremist References', extr_ref_analyzer.extract_reference_features, texts, labels)

## Hate Sentiment Features

In [None]:
evaluate_extractor('Hate Sentiment', hate_analyzer.get_hate_sentiment_features, texts, labels)

## Violence Sentiment Features

In [None]:
evaluate_extractor('Violence Sentiment', violence_analyzer.get_violence_sentiment_features, texts, labels)

## Overall Sentiment Features

In [None]:
evaluate_extractor('Overall Sentiment', overall_analyzer.get_vader_scores, texts, labels)