# Second-Order HMM

This notebook implements a second-order (trigram) HMM that incorporates BIO tagging and POS information for detecting negation and uncertainty markers in multilingual Spanish and Catalan medical texts

- Load preprocessed data with BIO tagging and POS
- Train a second-order HMM model that captures entity boundaries and longer dependencies
- Evaluate performance on test data
- Compare results with the first-order BIO+POS model
- Save the model and results

## Imports and Setup

In [None]:
import os
import sys
import json
import pickle
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from collections import defaultdict

PATH_ROOT = os.path.dirname(os.getcwd()) 
sys.path.append("..") 

from src.hmm import HMMSecondOrder
from src.evaluation import *

os.makedirs(os.path.join(PATH_ROOT, "data", "results", "models"), exist_ok=True)
os.makedirs(os.path.join(PATH_ROOT, "data", "results", "evaluation"), exist_ok=True)

np.random.seed(42) 

## Load Preprocessed Data

In [None]:
def load_processed_data(file_path):
    """Load preprocessed data from pickle file"""
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
    return data

train_file = os.path.join(PATH_ROOT, "data", "processed", "train_bio_pos.pkl")  
test_file = os.path.join(PATH_ROOT, "data", "processed", "test_bio_pos.pkl")    

train_data = load_processed_data(train_file)  
test_data = load_processed_data(test_file)    

print(f"Loaded {len(train_data['observations'])} training sequences")
print(f"Loaded {len(test_data['observations'])} test sequences")
print(f"Vocabulary size: {len(train_data['vocabulary'])}")
print(f"State space (BIO format): {train_data['state_space']}")

## Train Second-Order HMM Model

In [None]:
bio_vocabulary = set()
for obs_seq in train_data["observations"]:
    for obs in obs_seq:
        if isinstance(obs, tuple) and len(obs) == 2:
            bio_vocabulary.add(obs)  # Add word-POS tuple to vocabulary

second_order_model = HMMSecondOrder(
    state_space=train_data["state_space"],
    vocabulary=bio_vocabulary,
    smoothing=0.01
)

print("Training second-order BIO+POS HMM model...")
second_order_model.train(train_data["observations"], train_data["states"])  # Train the model

model_path = os.path.join(PATH_ROOT, "data", "results", "models", "hmm_second_order.pkl")
second_order_model.save(model_path)  
print(f"Model saved to {model_path}")

## Evaluate the Model

In [None]:
print("Making predictions on test data...")
test_predictions = second_order_model.predict(test_data["observations"])  # Generate predictions

print("Evaluating predictions...")
metrics = compute_metrics(test_data["states"], test_predictions, is_bio=True)  # Calculate token-level metrics
entity_metrics = get_entity_based_metrics(test_data["states"], test_predictions, is_bio=True)  # Entity metrics
scope_metrics = evaluate_scope_detection(test_data["states"], test_predictions, test_data["observations"], is_bio=True)  # Evaluate scope detection




print("\nClassification Report (Token Level with BIO):")
print_classification_report(test_data["states"], test_predictions, is_bio=True)

print("\nEntity-Level F1 Scores:")
for label, metrics_dict in entity_metrics.items():
    if label != "macro_avg":
        print(f"{label}: F1 = {metrics_dict['f1']:.4f}, Precision = {metrics_dict['precision']:.4f}, "
              f"Recall = {metrics_dict['recall']:.4f}, Support = {metrics_dict.get('support', 'N/A')}")
print(f"Macro Average: F1 = {entity_metrics['macro_avg']['f1']:.4f}")

print("\nScope Detection F1 Scores:")
for label, metrics_dict in scope_metrics.items():
    if label != "macro_avg":
        print(f"{label}: F1 = {metrics_dict['f1']:.4f}, Precision = {metrics_dict['precision']:.4f}, "
              f"Recall = {metrics_dict['recall']:.4f}")
print(f"Macro Average: F1 = {scope_metrics['macro_avg']['f1']:.4f}")

eval_path = os.path.join(PATH_ROOT, "data", "results", "evaluation", "evaluation_second_order.json")
save_metrics({
    "token_metrics": metrics,
    "entity_metrics": entity_metrics,
    "scope_metrics": scope_metrics
}, eval_path)  # Save metrics to file
print(f"Evaluation results saved to {eval_path}")

## Compare with First-Order BIO+POS Model

In [None]:
try:
    with open(os.path.join(PATH_ROOT, "data", "results", "evaluation", "evaluation_bio_pos.json"), 'r') as f:
        bio_pos_metrics = json.load(f)  # Load first-order evaluation results
    
    # Extract macro F1 scores
    bio_pos_token_f1 = bio_pos_metrics["token_metrics"]["macro_avg"]["f1"]  
    bio_pos_entity_f1 = bio_pos_metrics["entity_metrics"]["macro_avg"]["f1"]  
    bio_pos_scope_f1 = bio_pos_metrics["scope_metrics"]["macro_avg"]["f1"]  
    
    second_order_token_f1 = metrics["macro_avg"]["f1"]  
    second_order_entity_f1 = entity_metrics["macro_avg"]["f1"]  
    second_order_scope_f1 = scope_metrics["macro_avg"]["f1"]  
    
    # Plot comparison
    labels = ["Token-Level F1", "Entity-Level F1", "Scope-Level F1"]  
    bio_pos_scores = [bio_pos_token_f1, bio_pos_entity_f1, bio_pos_scope_f1]  
    second_order_scores = [second_order_token_f1, second_order_entity_f1, second_order_scope_f1]  
    
    x = np.arange(len(labels))  
    width = 0.35 
    
    fig, ax = plt.subplots(figsize=(12, 6)) 
    ax.bar(x - width/2, bio_pos_scores, width, label='First-Order BIO+POS HMM') 
    ax.bar(x + width/2, second_order_scores, width, label='Second-Order BIO+POS HMM') 
    
    ax.set_ylabel('F1 Score') 
    ax.set_title('Performance Comparison: First-Order vs. Second-Order BIO+POS HMM') 
    ax.set_xticks(x) 
    ax.set_xticklabels(labels) 
    ax.legend() 
    
   
    for i, v in enumerate(bio_pos_scores):
        ax.text(i - width/2, v + 0.01, f'{v:.4f}', ha='center')
    for i, v in enumerate(second_order_scores):
        ax.text(i + width/2, v + 0.01, f'{v:.4f}', ha='center')
    
    plt.tight_layout() 
    plt.savefig(os.path.join(PATH_ROOT, 'data', 'results', 'evaluation', 'first_vs_second_order.png')) 
    plt.show() 
    
   
    second_vs_first_token = (second_order_token_f1 - bio_pos_token_f1) / bio_pos_token_f1 * 100 
    second_vs_first_entity = (second_order_entity_f1 - bio_pos_entity_f1) / bio_pos_entity_f1 * 100 
    second_vs_first_scope = (second_order_scope_f1 - bio_pos_scope_f1) / bio_pos_scope_f1 * 100 
    
    print(f"Second-Order vs First-Order (Token-level F1): {second_vs_first_token:.2f}% improvement")
    print(f"Second-Order vs First-Order (Entity-level F1): {second_vs_first_entity:.2f}% improvement")
    print(f"Second-Order vs First-Order (Scope-level F1): {second_vs_first_scope:.2f}% improvement")
    
    print("\nEntity-level F1 comparison by entity type:")
    print("Entity Type\tFirst-Order\tSecond-Order\tImprovement")
    print("-" * 60)
    
    for entity_type in ["NEG", "NSCO", "UNC", "USCO"]:
        first_f1 = bio_pos_metrics["entity_metrics"][entity_type]["f1"]  # First-order F1 for entity type
        second_f1 = entity_metrics[entity_type]["f1"]  # Second-order F1 for entity type
        improvement = (second_f1 - first_f1) / first_f1 * 100  # Calculate improvement percentage
        
        print(f"{entity_type}\t\t{first_f1:.4f}\t\t{second_f1:.4f}\t\t{improvement:+.2f}%")

except FileNotFoundError:
    print("First-order BIO+POS evaluation results not found. Run hmm_BIO_POS.ipynb first.")