# Second-Order HMM

This notebook implements a second-order (trigram) HMM for detecting negation and uncertainty markers in multilingual Spanish and Catalan medical texts

- Load preprocessed baseline data (without BIO tagging or POS)
- Train a second-order HMM model that captures longer dependencies
- Evaluate performance on test data
- Compare results with the first-order baseline model
- Save the model and results

## Imports and Setup

In [None]:
import os
import sys
import json
import pickle
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from collections import defaultdict

PATH_ROOT = os.path.dirname(os.getcwd()) 
sys.path.append("..") 

from src.hmm import HMMSecondOrderBaseline
from src.evaluation import *

os.makedirs(os.path.join(PATH_ROOT, "data", "results", "models"), exist_ok=True)
os.makedirs(os.path.join(PATH_ROOT, "data", "results", "evaluation"), exist_ok=True)

np.random.seed(42) 

## Load Preprocessed Data

In [None]:
def load_processed_data(file_path):
    """Load preprocessed data from pickle file"""
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
    return data

train_file = os.path.join(PATH_ROOT, "data", "processed", "train_baseline.pkl")  
test_file = os.path.join(PATH_ROOT, "data", "processed", "test_baseline.pkl")    

train_data = load_processed_data(train_file)
test_data = load_processed_data(test_file)    

print(f"Loaded {len(train_data['observations'])} training sequences")
print(f"Loaded {len(test_data['observations'])} test sequences")
print(f"Vocabulary size: {len(train_data['vocabulary'])}")
print(f"State space: {train_data['state_space']}")

## Train Second-Order HMM Model

In [None]:
second_order_model = HMMSecondOrderBaseline(
    state_space=train_data["state_space"],
    vocabulary=train_data["vocabulary"],
    smoothing=0.01
)

print("Training second-order HMM model...")
second_order_model.train(train_data["observations"], train_data["states"])

model_path = os.path.join(PATH_ROOT, "data", "results", "models", "hmm_second_order.pkl")
second_order_model.save(model_path)  
print(f"Model saved to {model_path}")

## Evaluate the Model

In [None]:
print("Making predictions on test data...")
test_predictions = second_order_model.predict(test_data["observations"])

print("Evaluating predictions...")
metrics = compute_metrics(test_data["states"], test_predictions, is_bio=False)
entity_metrics = get_entity_based_metrics(test_data["states"], test_predictions, is_bio=False)
scope_metrics = evaluate_scope_detection(test_data["states"], test_predictions, test_data["observations"], is_bio=False)

print("\nClassification Report (Token Level):")
print_classification_report(test_data["states"], test_predictions, is_bio=False)

print("\nEntity-Level F1 Scores:")
for label, metrics_dict in entity_metrics.items():
    if label != "macro_avg":
        print(f"{label}: F1 = {metrics_dict['f1']:.4f}, Precision = {metrics_dict['precision']:.4f}, "
              f"Recall = {metrics_dict['recall']:.4f}, Support = {metrics_dict.get('support', 'N/A')}")
print(f"Macro Average: F1 = {entity_metrics['macro_avg']['f1']:.4f}")

print("\nScope Detection F1 Scores:")
for label, metrics_dict in scope_metrics.items():
    if label != "macro_avg":
        print(f"{label}: F1 = {metrics_dict['f1']:.4f}, Precision = {metrics_dict['precision']:.4f}, "
              f"Recall = {metrics_dict['recall']:.4f}")
print(f"Macro Average: F1 = {scope_metrics['macro_avg']['f1']:.4f}")

eval_path = os.path.join(PATH_ROOT, "data", "results", "evaluation", "evaluation_second_order.json")
save_metrics({
    "token_metrics": metrics,
    "entity_metrics": entity_metrics,
    "scope_metrics": scope_metrics
}, eval_path)  
print(f"Evaluation results saved to {eval_path}")

## Compare with Baseline HMM Model

In [None]:
# Improved code for comparing baseline and second-order models

try:
    # Load baseline evaluation results
    baseline_json_path = os.path.join(PATH_ROOT, "data", "results", "evaluation", "evaluation_baseline.json")
    with open(baseline_json_path, 'r') as f:
        baseline_metrics = json.load(f)
    
    # First check the structure of the baseline metrics file
    print(f"Structure of baseline metrics file: {list(baseline_metrics.keys())}")
    
    # Determine the structure and extract metrics accordingly
    if "token_metrics" in baseline_metrics:
        # Nested structure with top-level keys
        baseline_token_f1 = baseline_metrics["token_metrics"]["macro_avg"]["f1"]
        baseline_entity_f1 = baseline_metrics["entity_metrics"]["macro_avg"]["f1"]
        has_scope = "scope_metrics" in baseline_metrics
        if has_scope:
            baseline_scope_f1 = baseline_metrics["scope_metrics"]["macro_avg"]["f1"]
    else:
        # Flat structure (metrics directly at top level)
        baseline_token_f1 = baseline_metrics["macro_avg"]["f1"]
        baseline_entity_f1 = entity_metrics["macro_avg"]["f1"]
        has_scope = False
    
    # Current second-order metrics
    second_order_token_f1 = metrics["macro_avg"]["f1"]
    second_order_entity_f1 = entity_metrics["macro_avg"]["f1"]
    second_order_scope_f1 = scope_metrics["macro_avg"]["f1"]
    
    # Prepare comparison data based on available metrics
    if has_scope:
        # If scope metrics are available in both
        labels = ["Token-Level F1", "Entity-Level F1", "Scope-Level F1"]
        baseline_scores = [baseline_token_f1, baseline_entity_f1, baseline_scope_f1]
        second_order_scores = [second_order_token_f1, second_order_entity_f1, second_order_scope_f1]
    else:
        # If scope metrics are only in second-order
        print("Note: Scope metrics not found in baseline evaluation. Only comparing token and entity metrics.")
        labels = ["Token-Level F1", "Entity-Level F1"]
        baseline_scores = [baseline_token_f1, baseline_entity_f1]
        second_order_scores = [second_order_token_f1, second_order_entity_f1]
    
    # Create visualization
    x = np.arange(len(labels))
    width = 0.35
    
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.bar(x - width/2, baseline_scores, width, label='First-Order Baseline HMM')
    ax.bar(x + width/2, second_order_scores[:len(baseline_scores)], width, label='Second-Order Baseline HMM')
    
    ax.set_ylabel('F1 Score')
    ax.set_title('Performance Comparison: First-Order vs. Second-Order Baseline HMM')
    ax.set_xticks(x)
    ax.set_xticklabels(labels)
    ax.legend()
    
    # Add value labels to bars
    for i, v in enumerate(baseline_scores):
        ax.text(i - width/2, v + 0.01, f'{v:.4f}', ha='center')
    for i, v in enumerate(second_order_scores[:len(baseline_scores)]):
        ax.text(i + width/2, v + 0.01, f'{v:.4f}', ha='center')
    
    plt.tight_layout()
    plt.savefig(os.path.join(PATH_ROOT, 'data', 'results', 'evaluation', 'second_order_baseline_vs_baseline.png'))
    plt.show()
    
    # Calculate and print performance improvements
    print("\nPerformance Comparison:")
    print("-" * 60)
    
    # Token-level improvement
    second_vs_baseline_token = (second_order_token_f1 - baseline_token_f1) / baseline_token_f1 * 100
    print(f"Second-Order vs Baseline (Token-level F1): {second_vs_baseline_token:.2f}% improvement")
    
    # Entity-level improvement
    second_vs_baseline_entity = (second_order_entity_f1 - baseline_entity_f1) / baseline_entity_f1 * 100
    print(f"Second-Order vs Baseline (Entity-level F1): {second_vs_baseline_entity:.2f}% improvement")
    
    # Scope-level improvement (if available)
    if has_scope:
        second_vs_baseline_scope = (second_order_scope_f1 - baseline_scope_f1) / baseline_scope_f1 * 100
        print(f"Second-Order vs Baseline (Scope-level F1): {second_vs_baseline_scope:.2f}% improvement")
    
    # Entity-type level comparison
    print("\nEntity-level F1 comparison by entity type:")
    print("Entity Type\tBaseline\tSecond-Order\tImprovement")
    print("-" * 60)
    
    for entity_type in ["NEG", "NSCO", "UNC", "USCO"]:
        # Get baseline F1 score based on structure
        if "entity_metrics" in baseline_metrics:
            baseline_f1 = baseline_metrics["entity_metrics"][entity_type]["f1"]
        else:
            # Try direct access if flat structure
            try:
                baseline_f1 = baseline_metrics[entity_type]["f1"]
            except KeyError:
                print(f"Could not find {entity_type} metrics in baseline evaluation")
                continue
            
        second_order_f1 = entity_metrics[entity_type]["f1"]
        improvement = (second_order_f1 - baseline_f1) / baseline_f1 * 100
        
        print(f"{entity_type}\t\t{baseline_f1:.4f}\t\t{second_order_f1:.4f}\t\t{improvement:+.2f}%")

except FileNotFoundError:
    print(f"Baseline evaluation results not found at {baseline_json_path}.")
    print("Run hmm_baseline.ipynb first and ensure it saves evaluation results.")
except KeyError as e:
    print(f"Error: Key {e} not found in metrics file.")
    print("Structure of baseline_metrics:", list(baseline_metrics.keys()) if 'baseline_metrics' in locals() else "Unknown")
    if 'baseline_metrics' in locals() and "token_metrics" in baseline_metrics:
        print("Available keys in token_metrics:", list(baseline_metrics["token_metrics"].keys()))
except Exception as e:
    print(f"Unexpected error during comparison: {str(e)}")
    import traceback
    traceback.print_exc()