# Water Conflict Classifier - Demo Notebook

This notebook demonstrates:
1. Loading the public dataset from Hugging Face
2. Loading the pre-trained model
3. Training (or fine-tuning) the model
4. Testing the model
5. Making predictions on custom headlines

**Dataset**: `baobabtech/water-conflict-training-data`  
**Model**: `baobabtech/water-conflict-classifier`


## 1. Setup and Installation

Install required packages:


In [None]:
# Install required packages
%pip install -q setfit datasets sentence-transformers scikit-learn pandas numpy matplotlib


## 2. Load Dataset from Hugging Face Hub


In [None]:
import pandas as pd
import numpy as np
from datasets import load_dataset
from setfit import SetFitModel
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, hamming_loss, f1_score

# Load dataset from HF Hub
print("Loading dataset from Hugging Face Hub...")
dataset = load_dataset("baobabtech/water-conflict-training-data")

# Combine train split into a DataFrame
df = dataset['train'].to_pandas()
print(f"\nLoaded {len(df)} examples")
print(f"Columns: {list(df.columns)}")
print(f"\nFirst few rows:")
df.head()


## 3. Data Preprocessing

Convert the raw data into multi-label format for SetFit training.


In [None]:
# Label names
LABEL_NAMES = ['Trigger', 'Casualty', 'Weapon']

def preprocess_for_training(df):
    """
    Convert dataset to multi-label format:
    - text: headline
    - labels: [Trigger, Casualty, Weapon] as [0/1, 0/1, 0/1]
    """
    processed = df.copy()
    processed['text'] = processed['Headline']
    
    # Parse labels from Basis column
    processed['labels'] = processed['Basis'].apply(
        lambda basis: [
            1 if 'Trigger' in str(basis) else 0,
            1 if 'Casualty' in str(basis) else 0,
            1 if 'Weapon' in str(basis) else 0
        ]
    )
    
    return processed[['text', 'labels']]

# Preprocess
data = preprocess_for_training(df)

# Print label distribution
label_counts = np.array(data['labels'].tolist()).sum(axis=0)
print("\nLabel distribution:")
for label_name, count in zip(LABEL_NAMES, label_counts):
    print(f"  - {label_name}: {int(count)} ({count/len(data)*100:.1f}%)")

# Count negatives (all zeros)
neg_count = (data['labels'].apply(lambda x: x == [0, 0, 0])).sum()
print(f"  - Negatives (no labels): {neg_count} ({neg_count/len(data)*100:.1f}%)")

data.head()


## 4. Train/Test Split


In [None]:
# Split into train/test sets (80/20)
train_data, test_data = train_test_split(
    data, 
    test_size=0.2, 
    random_state=42
)

print(f"Training examples: {len(train_data)}")
print(f"Test examples: {len(test_data)}")

# Convert to lists for SetFit
train_texts = train_data['text'].tolist()
train_labels = train_data['labels'].tolist()
test_texts = test_data['text'].tolist()
test_labels = test_data['labels'].tolist()


## 5. Option A: Load Pre-trained Model

Load the already-trained model from Hugging Face Hub.


In [None]:
# Load pre-trained model
print("Loading pre-trained model from Hugging Face Hub...")
model = SetFitModel.from_pretrained("baobabtech/water-conflict-classifier")
print("✓ Model loaded successfully!")


## 5. Option B: Train a New Model (Alternative)

Or train a new model from scratch using the dataset.

**Note**: This will take significant time. Skip this if you just want to use the pre-trained model.


In [None]:
# Uncomment to train from scratch
# from setfit import SetFitModel, Trainer, TrainingArguments
# from datasets import Dataset

# # Initialize model
# model = SetFitModel.from_pretrained(
#     "sentence-transformers/paraphrase-mpnet-base-v2",
#     multi_target_strategy="one-vs-rest",
#     labels=LABEL_NAMES
# )

# # Create Dataset objects
# train_dataset = Dataset.from_dict({
#     "text": train_texts,
#     "label": train_labels
# })

# test_dataset = Dataset.from_dict({
#     "text": test_texts,
#     "label": test_labels
# })

# # Training arguments
# args = TrainingArguments(
#     batch_size=16,
#     num_epochs=1,
#     evaluation_strategy="epoch",
#     save_strategy="epoch",
#     load_best_model_at_end=True,
# )

# # Trainer
# trainer = Trainer(
#     model=model,
#     args=args,
#     train_dataset=train_dataset,
#     eval_dataset=test_dataset,
# )

# # Train
# print("Training model...")
# trainer.train()
# print("✓ Training complete!")


## 6. Evaluate Model on Test Set


In [None]:
# Make predictions on test set
print("Evaluating model on test set...")
test_predictions = model.predict(test_texts)

# Convert predictions to binary format
if isinstance(test_predictions[0], list):
    # Already in list format
    y_pred = np.array(test_predictions)
else:
    # Convert from numpy array
    y_pred = test_predictions

y_true = np.array(test_labels)

# Calculate metrics
print("\n" + "="*60)
print("EVALUATION METRICS")
print("="*60)

# Overall metrics
print(f"\nHamming Loss: {hamming_loss(y_true, y_pred):.4f}")
print(f"Micro F1: {f1_score(y_true, y_pred, average='micro'):.4f}")
print(f"Macro F1: {f1_score(y_true, y_pred, average='macro'):.4f}")
print(f"Weighted F1: {f1_score(y_true, y_pred, average='weighted'):.4f}")

# Per-label metrics
print("\n" + "="*60)
print("PER-LABEL METRICS")
print("="*60)
print(classification_report(
    y_true, 
    y_pred, 
    target_names=LABEL_NAMES,
    zero_division=0
))


## 7. Try Out the Model on Custom Headlines

Test the model on your own headlines!


In [None]:
def predict_and_display(headlines):
    """
    Predict labels for headlines and display results nicely.
    """
    predictions = model.predict(headlines)
    
    print("\n" + "="*80)
    print("PREDICTIONS")
    print("="*80)
    
    for headline, pred in zip(headlines, predictions):
        labels = [LABEL_NAMES[i] for i, val in enumerate(pred) if val == 1]
        
        print(f"\nHeadline: {headline}")
        if labels:
            print(f"  Labels: {', '.join(labels)}")
        else:
            print(f"  Labels: None (not water conflict)")
        print(f"  Raw: {pred}")

# Example headlines to test
test_headlines = [
    "Armed groups attacked water treatment facility in northern region",
    "Dispute over river water rights leads to protest",
    "Government announces new education policy",
    "Dam used as leverage in territorial conflict",
    "Political party holds rally in capital city",
]

predict_and_display(test_headlines)


## 8. Interactive Prediction

Enter your own headlines to classify:


In [None]:
# Interactive prediction
while True:
    headline = input("\nEnter a headline to classify (or 'quit' to exit): ")
    
    if headline.lower() in ['quit', 'exit', 'q']:
        print("Goodbye!")
        break
    
    if not headline.strip():
        continue
    
    predict_and_display([headline])


## 9. Analyze Model Performance by Label

Deep dive into which types of conflicts the model handles best.


In [None]:
# Analyze performance by label
import matplotlib.pyplot as plt

# Calculate per-label F1 scores
f1_per_label = f1_score(y_true, y_pred, average=None)

# Plot
plt.figure(figsize=(10, 6))
plt.bar(LABEL_NAMES, f1_per_label)
plt.xlabel('Label')
plt.ylabel('F1 Score')
plt.title('Model Performance by Label Type')
plt.ylim([0, 1])
plt.grid(axis='y', alpha=0.3)

# Add value labels on bars
for i, v in enumerate(f1_per_label):
    plt.text(i, v + 0.02, f'{v:.3f}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

# Print confusion info for each label
print("\nDetailed Analysis:")
for i, label_name in enumerate(LABEL_NAMES):
    true_positives = ((y_true[:, i] == 1) & (y_pred[:, i] == 1)).sum()
    false_positives = ((y_true[:, i] == 0) & (y_pred[:, i] == 1)).sum()
    false_negatives = ((y_true[:, i] == 1) & (y_pred[:, i] == 0)).sum()
    true_negatives = ((y_true[:, i] == 0) & (y_pred[:, i] == 0)).sum()
    
    print(f"\n{label_name}:")
    print(f"  True Positives: {true_positives}")
    print(f"  False Positives: {false_positives}")
    print(f"  False Negatives: {false_negatives}")
    print(f"  True Negatives: {true_negatives}")
    print(f"  F1 Score: {f1_per_label[i]:.3f}")


## 10. Export Predictions

Save test set predictions to CSV for further analysis.


In [None]:
# Create results dataframe
results_df = pd.DataFrame({
    'headline': test_texts,
    'true_trigger': y_true[:, 0],
    'true_casualty': y_true[:, 1],
    'true_weapon': y_true[:, 2],
    'pred_trigger': y_pred[:, 0],
    'pred_casualty': y_pred[:, 1],
    'pred_weapon': y_pred[:, 2],
})

# Add correctness column
results_df['all_correct'] = (
    (results_df['true_trigger'] == results_df['pred_trigger']) &
    (results_df['true_casualty'] == results_df['pred_casualty']) &
    (results_df['true_weapon'] == results_df['pred_weapon'])
)

# Save to CSV
output_file = 'test_predictions.csv'
results_df.to_csv(output_file, index=False)
print(f"\n✓ Predictions saved to {output_file}")

# Show some examples
print("\nSample predictions:")
results_df.head(10)


## Summary

This notebook demonstrated:
- ✓ Loading public dataset from HF Hub
- ✓ Loading pre-trained model
- ✓ Evaluating model performance
- ✓ Making predictions on custom headlines
- ✓ Analyzing per-label performance
- ✓ Exporting results

**Next Steps:**
- Fine-tune the model on additional data
- Experiment with different base models
- Deploy as an API or web service
- Apply to new datasets for conflict analysis
