In [2]:
# %pip install numpy pandas transformers scikit-learn hf_xet 'accelerate>=0.26.0' datasets
# %pip install --upgrade transformers

In [None]:
import sys
import os

script_dir = os.path.dirname(os.path.abspath(os.getcwd()))
parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir))
sys.path.append(script_dir)
from utils import *
from dataset.simple_dataset import *
from models.simple_bert import *

import pandas as pd
import numpy as np
import torch
from sklearn.metrics import f1_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.utils.class_weight import compute_class_weight
import json
import warnings
warnings.filterwarnings('ignore')

from transformers import AutoTokenizer, AutoModel
from sklearn.dummy import DummyClassifier

print("=== SIMPLE FOOD HAZARD DETECTION - COMPATIBILITY MODE ===")
print(f"PyTorch version: {torch.__version__}")

# Configuration - Ultra simple
CONFIG = {
    'st1_task': True,  # Change to False for ST2
    'use_bert': False,  # Set to True if you want to try BERT
    'max_features': 10000,  # TF-IDF features
    'test_bert_loading': False  # Test if BERT loading works
}

# Configuration - Ultra simple
# CONFIG = {
#     'st1_task': False,  # Change to False for ST2
#     'use_bert': False,  # Set to True if you want to try BERT
#     'max_features': 10000,  # TF-IDF features
#     'test_bert_loading': False  # Test if BERT loading works
# }

### 1. Load Data

In [None]:
print("\n1. Loading data...")

DATA_PATH = "https://github.com/food-hazard-detection-semeval-2025/food-hazard-detection-semeval-2025.github.io/blob/main/data/"

train = pd.read_csv(os.path.join(DATA_PATH, "incidents_train.csv?raw=true"))
valid = pd.read_csv(os.path.join(DATA_PATH, "incidents_valid.csv?raw=true"))
test = pd.read_csv(os.path.join(DATA_PATH, "incidents_test.csv?raw=true"))

task_name = "ST1" if CONFIG['st1_task'] else "ST2"
print(f"Task: {task_name}")
print(f"Method: {'BERT' if CONFIG['use_bert'] else 'TF-IDF + LogReg'}")

print(f"Data loaded - Train: {len(train)}, Valid: {len(valid)}, Test: {len(test)}")

# Select columns based on task
if CONFIG['st1_task']:
    hazard_col = 'hazard-category'
    product_col = 'product-category'
else:
    hazard_col = 'hazard'
    product_col = 'product'

print(f"Target columns: {hazard_col}, {product_col}")

### 2. Data Analysis

In [None]:
print("\n2. Data analysis...")
hazard_counts = train[hazard_col].value_counts()
product_counts = train[product_col].value_counts()

print(f"Hazard classes: {len(hazard_counts)}")
print(f"Product classes: {len(product_counts)}")
print(f"Most common hazard: {hazard_counts.index[0]} ({hazard_counts.iloc[0]} samples)")
print(f"Most common product: {product_counts.index[0]} ({product_counts.iloc[0]} samples)")

# Check for imbalance
imbalance_ratio_h = hazard_counts.iloc[0] / hazard_counts.iloc[-1]
imbalance_ratio_p = product_counts.iloc[0] / product_counts.iloc[-1]
print(f"Hazard imbalance ratio: {imbalance_ratio_h:.1f}x")
print(f"Product imbalance ratio: {imbalance_ratio_p:.1f}x")

### 3. Text preprocessing

In [None]:
print("\n3. Text preparation...")

train_texts = prepare_text(train)
valid_texts = prepare_text(valid)
test_texts = prepare_text(test)

print(f"Texts prepared - Average length: {np.mean([len(t.split()) for t in train_texts[:100]]):.1f} words")

### 4. Test BERT loading if requested

In [6]:
if CONFIG['test_bert_loading']:
    print("\n4. Testing BERT loading...")
    try:
        tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')  # Lighter model
        model = AutoModel.from_pretrained('distilbert-base-uncased')
        print("BERT loading successful! You can set use_bert=True")
        CONFIG['use_bert'] = True
    except Exception as e:
        print(f"BERT loading failed: {e}")
        print("Continuing with TF-IDF...")
        CONFIG['use_bert'] = False

### 5. Model Training - TF-IDF Version (Always works)


In [None]:
if not CONFIG['use_bert']:
    print("\n5. Training TF-IDF + LogReg model...")
    
    # TF-IDF Vectorization
    vectorizer = TfidfVectorizer(
        max_features=CONFIG['max_features'],
        ngram_range=(1, 2),
        min_df=2,
        max_df=0.95,
        stop_words='english'
    )
    
    print("Creating TF-IDF features...")
    X_train = vectorizer.fit_transform(train_texts)
    X_valid = vectorizer.transform(valid_texts)
    X_test = vectorizer.transform(test_texts)
    
    print(f"TF-IDF shape: {X_train.shape}")
    
    # Prepare labels
    y_train_hazard = train[hazard_col].values
    y_valid_hazard = valid[hazard_col].values
    y_test_hazard = test[hazard_col].values
    
    y_train_product = train[product_col].values
    y_valid_product = valid[product_col].values
    y_test_product = test[product_col].values
    
    # Class weights for imbalanced data
    hazard_classes = np.unique(y_train_hazard)
    product_classes = np.unique(y_train_product)
    
    hazard_weights = compute_class_weight('balanced', classes=hazard_classes, y=y_train_hazard)
    product_weights = compute_class_weight('balanced', classes=product_classes, y=y_train_product)
    
    hazard_weight_dict = dict(zip(hazard_classes, hazard_weights))
    product_weight_dict = dict(zip(product_classes, product_weights))
    
    print(f"Class weights computed - Hazard: {len(hazard_weight_dict)}, Product: {len(product_weight_dict)}")
    
    # Train models
    print("Training hazard classifier...")
    hazard_model = LogisticRegression(
        class_weight=hazard_weight_dict,
        max_iter=1000,
        random_state=42
    )
    hazard_model.fit(X_train, y_train_hazard)
    
    print("Training product classifier...")
    product_model = LogisticRegression(
        class_weight=product_weight_dict,
        max_iter=1000,
        random_state=42
    )
    product_model.fit(X_train, y_train_product)
    
    print("Models trained")
    
    # Predictions
    print("\n6. Making predictions...")
    hazard_pred_valid = hazard_model.predict(X_valid)
    product_pred_valid = product_model.predict(X_valid)
    
    hazard_pred_test = hazard_model.predict(X_test)
    product_pred_test = product_model.predict(X_test)
    
# Simple BERT Version (if compatible)
elif CONFIG['use_bert']:
    print("\n5. Training Simple BERT...")
        
    # Initialize
    tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
    
    train_dataset = SimpleDataset(train_texts, train[hazard_col].values, train[product_col].values, tokenizer)
    valid_dataset = SimpleDataset(valid_texts, valid[hazard_col].values, valid[product_col].values, tokenizer)
    test_dataset = SimpleDataset(test_texts, test[hazard_col].values, test[product_col].values, tokenizer)
    
    model = SimpleBERT('distilbert-base-uncased', len(train_dataset.hazard_to_id), len(train_dataset.product_to_id))
    
    # Simple training (just 1 epoch for demo)
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)
    
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    criterion = nn.CrossEntropyLoss()
    
    print("Training (1 epoch for demo)...")
    model.train()
    for batch_idx, batch in enumerate(train_loader):
        if batch_idx > 50:  # Just first 50 batches for demo
            break
            
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        hazard_labels = batch['hazard_label'].to(device)
        product_labels = batch['product_label'].to(device)
        
        optimizer.zero_grad()
        hazard_logits, product_logits = model(input_ids, attention_mask)
        
        loss = criterion(hazard_logits, hazard_labels) + criterion(product_logits, product_labels)
        loss.backward()
        optimizer.step()
        
        if batch_idx % 10 == 0:
            print(f"Batch {batch_idx}/50, Loss: {loss.item():.4f}")
    
    print("BERT training completed (demo)")
    
    # Simple predictions for BERT would go here...
    # For now, fall back to TF-IDF results
    print("  Note: Using TF-IDF results for evaluation")

### 7. Results

In [None]:
print("\n7. Evaluation...")

# Validation results
valid_scores = compute_food_hazard_score(
    y_valid_hazard, y_valid_product,
    hazard_pred_valid, product_pred_valid
)

# Test results  
test_scores = compute_food_hazard_score(
    y_test_hazard, y_test_product,
    hazard_pred_test, product_pred_test
)

print("\n=== VALIDATION RESULTS ===")
print(f"Hazard F1: {valid_scores['f1_hazards']:.4f}")
print(f"Product F1: {valid_scores['f1_products']:.4f}")
print(f"Final Score: {valid_scores['final_score']:.4f}")

print("\n=== TEST RESULTS ===")
print(f"Hazard F1: {test_scores['f1_hazards']:.4f}")
print(f"Product F1: {test_scores['f1_products']:.4f}")
print(f"Final Score: {test_scores['final_score']:.4f}")

### 8. Comparison with baselines

In [None]:
print("\n8. Baseline comparison...")

# Majority classifier
dummy_hazard = DummyClassifier(strategy='most_frequent')
dummy_product = DummyClassifier(strategy='most_frequent')

dummy_hazard.fit(X_train, y_train_hazard)
dummy_product.fit(X_train, y_train_product)

dummy_hazard_pred = dummy_hazard.predict(X_test)
dummy_product_pred = dummy_product.predict(X_test)

dummy_scores = compute_food_hazard_score(
    y_test_hazard, y_test_product,
    dummy_hazard_pred, dummy_product_pred
)

print(f"Majority Baseline: {dummy_scores['final_score']:.4f}")
print(f"Our Model: {test_scores['final_score']:.4f}")
print(f"Improvement: +{test_scores['final_score'] - dummy_scores['final_score']:.4f}")

### 9. Competition comparison

In [None]:
print(f"\n=== COMPETITION COMPARISON ({task_name}) ===")
if CONFIG['st1_task']:
    competition_best = 0.8223
    competition_bert = 0.667
    print(f"Competition Best (Anastasia): {competition_best:.4f}")
    print(f"Competition BERT Baseline: {competition_bert:.4f}")
else:
    competition_best = 0.5473
    competition_bert = 0.498
    print(f"Competition Best (SRCB): {competition_best:.4f}")
    print(f"Competition BERT Baseline: {competition_bert:.4f}")

print(f"Your Result: {test_scores['final_score']:.4f}")

if CONFIG['st1_task']:
    if test_scores['final_score'] > competition_bert:
        print(f"You beat the BERT baseline by {test_scores['final_score'] - competition_bert:.4f}!")
    gap_to_best = competition_best - test_scores['final_score']
    print(f"Gap to best: {gap_to_best:.4f}")
else:
    if test_scores['final_score'] > competition_bert:
        print(f"You beat the BERT baseline by {test_scores['final_score'] - competition_bert:.4f}!")
    gap_to_best = competition_best - test_scores['final_score']
    print(f"Gap to best: {gap_to_best:.4f}")

### 10. Save results

In [None]:
results_summary = {
    'task': task_name,
    'method': 'TF-IDF + LogReg with Class Weights',
    'config': CONFIG,
    'data_stats': {
        'train_size': len(train),
        'valid_size': len(valid),
        'test_size': len(test),
        'hazard_classes': len(hazard_counts),
        'product_classes': len(product_counts),
        'hazard_imbalance': float(imbalance_ratio_h),
        'product_imbalance': float(imbalance_ratio_p)
    },
    'results': {
        'validation': valid_scores,
        'test': test_scores,
        'baseline': dummy_scores,
        'improvement': float(test_scores['final_score'] - dummy_scores['final_score'])
    },
    'competition_comparison': {
        'competition_best': float(competition_best),
        'competition_bert': float(competition_bert),
        'our_result': float(test_scores['final_score']),
        'gap_to_best': float(competition_best - test_scores['final_score'])
    }
}

filename = f'simple_results_{task_name.lower()}.json'
with open(filename, 'w') as f:
    json.dump(results_summary, f, indent=2, default=str)

print(f"\nResults saved to {filename}")

print("\n=== EXPERIMENT COMPLETED ===")
print("Method: TF-IDF + Logistic Regression")
print(f"Final {task_name} Score: {test_scores['final_score']:.4f}")

# Quick suggestions for improvement
print("\nNEXT STEPS FOR BETTER RESULTS:")
print("1. Try different TF-IDF parameters (max_features, ngram_range)")
print("2. Add more feature engineering (text length, country, date)")
print("3. Try ensemble methods (combine multiple models)")
print("4. If BERT works, try: 'test_bert_loading': True")
print("5. For ST2 task, set: 'st1_task': False")