# HMM with BIO Tagging and POS

This notebook implements an alternative Hidden Markov Model that incorporates:
1. BIO tagging
2. Part-of-Speech (POS)

For detecting negation and uncertainty markers in multilingual Spanish and Catalan medical texts

- Load the preprocessed data with BIO tagging and POS
- Train the enhanced HMM model
- Evaluate performance on test data
- Compare with the baseline model
- Save the model and results

## Imports and Setup

In [None]:
import os
import sys
import json
import pickle
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.metrics import classification_report, confusion_matrix

sys.path.append("..")

from src.hmm import HMMBIOPOS
from src.evaluation import *

PATH_ROOT = os.path.dirname(os.getcwd()) 

os.makedirs(os.path.join(PATH_ROOT, "data", "results", "models"), exist_ok=True) 
os.makedirs(os.path.join(PATH_ROOT, "data", "results", "evaluation"), exist_ok=True)

np.random.seed(42) 

## Load Preprocessed Data

In [None]:
def load_processed_data(file_path):
    """Load preprocessed data from pickle file"""
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
    return data

train_file = os.path.join(PATH_ROOT, "data", "processed", "train_bio_pos.pkl")
test_file = os.path.join(PATH_ROOT, "data", "processed", "test_bio_pos.pkl")

train_data = load_processed_data(train_file) 
test_data = load_processed_data(test_file)

print(f"Loaded {len(train_data['observations'])} training sequences")
print(f"Loaded {len(test_data['observations'])} test sequences")
print(f"Vocabulary size: {len(train_data['vocabulary'])}")
print(f"State space (BIO format): {train_data['state_space']}")

tag_counts = {} # Check BIO tag distribution
for state_seq in train_data["states"]:
    for state in state_seq:
        if state not in tag_counts:
            tag_counts[state] = 0
        tag_counts[state] += 1

print("\nBIO Tag Distribution (Training):")
for tag, count in sorted(tag_counts.items()):
    print(f"  {tag}: {count} tokens")

## Train HMM Model

In [None]:
bio_vocabulary = set()
for obs_seq in train_data["observations"]:
    for obs in obs_seq:
        if isinstance(obs, tuple) and len(obs) == 2:
            bio_vocabulary.add(obs)  # Add word-POS tuple to vocabulary

bio_pos_model = HMMBIOPOS( # Initialize and train the BIO+POS enhanced HMM model
    state_space=train_data["state_space"],
    vocabulary=bio_vocabulary,
    smoothing=0.01
)

print("Training BIO+POS enhanced HMM model...")
bio_pos_model.train(train_data["observations"], train_data["states"])  # Train the model

model_path = os.path.join(PATH_ROOT, "data", "results", "models", "hmm_bio_pos.pkl")  # Path for saving model
bio_pos_model.save(model_path)  # Save the trained model
print(f"Model saved to {model_path}")

## Evaluate the Model

In [None]:
print("Making predictions on test data...")
test_predictions = bio_pos_model.predict(test_data["observations"])  # Generate predictions

print("Evaluating predictions...")
metrics = compute_metrics(test_data["states"], test_predictions, is_bio=True)  # Calculate token-level metrics
entity_metrics = get_entity_based_metrics(test_data["states"], test_predictions, is_bio=True)  # Calculate entity metrics
scope_metrics = evaluate_scope_detection(test_data["states"], test_predictions, test_data["observations"], is_bio=True)  # Evaluate scope detection



print("\nClassification Report (Token Level with BIO):")
print_classification_report(test_data["states"], test_predictions, is_bio=True)

print("\nEntity-Level F1 Scores:")
for label, metrics_dict in entity_metrics.items():
    if label != "macro_avg":
        print(f"{label}: F1 = {metrics_dict['f1']:.4f}, Precision = {metrics_dict['precision']:.4f}, "
              f"Recall = {metrics_dict['recall']:.4f}, Support = {metrics_dict.get('support', 'N/A')}")
print(f"Macro Average: F1 = {entity_metrics['macro_avg']['f1']:.4f}")

print("\nScope Detection F1 Scores:")
for label, metrics_dict in scope_metrics.items():
    if label != "macro_avg":
        print(f"{label}: F1 = {metrics_dict['f1']:.4f}, Precision = {metrics_dict['precision']:.4f}, "
              f"Recall = {metrics_dict['recall']:.4f}")
print(f"Macro Average: F1 = {scope_metrics['macro_avg']['f1']:.4f}")

eval_path = os.path.join(PATH_ROOT, "data", "results", "evaluation", "evaluation_bio_pos.json")  # Path for saving evaluation
save_metrics({
    "token_metrics": metrics,
    "entity_metrics": entity_metrics,
    "scope_metrics": scope_metrics
}, eval_path)  # Save metrics to file
print(f"Evaluation results saved to {eval_path}")

## Compare with Baseline Model

In [None]:
try:
    with open(os.path.join(PATH_ROOT, "data", "results", "evaluation", "evaluation_baseline.json"), 'r') as f:
        baseline_metrics = json.load(f) # Load baseline metrics
        
    baseline_token_f1 = baseline_metrics["token_metrics"]["macro_avg"]["f1"]  # Get baseline token F1
    baseline_entity_f1 = baseline_metrics["entity_metrics"]["macro_avg"]["f1"]  # Get baseline entity F1
    
    bio_pos_token_f1 = metrics["macro_avg"]["f1"]  # Get BIO+POS token F1
    bio_pos_entity_f1 = entity_metrics["macro_avg"]["f1"]  # Get BIO+POS entity F1
    
    labels = ['Token-Level F1', 'Entity-Level F1']  # Chart labels
    baseline_scores = [baseline_token_f1, baseline_entity_f1]  # Baseline scores
    bio_pos_scores = [bio_pos_token_f1, bio_pos_entity_f1]  # BIO+POS scores
    
    x = np.arange(len(labels))
    width = 0.35 
    
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.bar(x - width/2, baseline_scores, width, label='Baseline HMM')  # Plot baseline
    ax.bar(x + width/2, bio_pos_scores, width, label='BIO+POS HMM')  # Plot BIO+POS
    
    ax.set_ylabel('F1 Score')
    ax.set_title('Performance Comparison: Baseline vs. BIO+POS Enhanced HMM')  # Chart title
    ax.set_xticks(x) 
    ax.set_xticklabels(labels) 
    ax.legend() 
    
    # Add values on top of bars
    for i, v in enumerate(baseline_scores):
        ax.text(i - width/2, v + 0.01, f'{v:.4f}', ha='center')
    for i, v in enumerate(bio_pos_scores):
        ax.text(i + width/2, v + 0.01, f'{v:.4f}', ha='center')
    
    plt.tight_layout()
    plt.savefig(os.path.join(PATH_ROOT, "data", "results", "evaluation", "bio_pos_vs_baseline.png"))
    plt.show()
    
    token_improvement = (bio_pos_token_f1 - baseline_token_f1) / baseline_token_f1 * 100  # Calculate token improvement
    entity_improvement = (bio_pos_entity_f1 - baseline_entity_f1) / baseline_entity_f1 * 100  # Calculate entity improvement
    
    print(f"Token-level F1 improvement: {token_improvement:.2f}%") 
    print(f"Entity-level F1 improvement: {entity_improvement:.2f}%") 
    
except FileNotFoundError:
    print("Baseline evaluation results not found. Run hmm_baseline.ipynb first.")

## Analyze BIO Tagging Effectiveness

In [None]:
def analyze_bio_tagging(test_states, test_predictions):
    # Convert to standard (non-BIO) tags for comparison
    std_test_states = [convert_bio_to_standard(seq) for seq in test_states]  # Convert true states
    std_test_predictions = [convert_bio_to_standard(seq) for seq in test_predictions]  # Convert predicted states
    
    # Evaluate with and without BIO tagging
    bio_metrics = get_entity_based_metrics(test_states, test_predictions, is_bio=True)  # Metrics with BIO
    std_metrics = get_entity_based_metrics(std_test_states, std_test_predictions, is_bio=False)  # Metrics without BIO
    
    print("Entity-Level F1 Scores - Comparing BIO vs Standard Evaluation:")
    print("Entity Type\tBIO Evaluation\tStandard Evaluation\tDifference")
    print("-" * 70)
    
    for label in ["NEG", "NSCO", "UNC", "USCO"]:
        bio_f1 = bio_metrics[label]["f1"]  # BIO F1 for label
        std_f1 = std_metrics[label]["f1"]  # Standard F1 for label
        diff = bio_f1 - std_f1  # Calculate difference
        
        print(f"{label}\t\t{bio_f1:.4f}\t\t{std_f1:.4f}\t\t{diff:+.4f}")

    bio_macro_f1 = bio_metrics["macro_avg"]["f1"]  # BIO macro F1
    std_macro_f1 = std_metrics["macro_avg"]["f1"]  # Standard macro F1
    macro_diff = bio_macro_f1 - std_macro_f1  # Calculate macro difference
    
    print("-" * 70)
    print(f"Macro Avg\t{bio_macro_f1:.4f}\t\t{std_macro_f1:.4f}\t\t{macro_diff:+.4f}")
    
    return bio_metrics, std_metrics

bio_metrics, std_metrics = analyze_bio_tagging(test_data["states"], test_predictions)

In [None]:
labels = sorted(test_data["state_space"])  # Ensure labels are sorted
flat_true_states = [tag for seq in test_data["states"] for tag in seq]
flat_pred_states = [tag for seq in test_predictions for tag in seq]

report = classification_report(flat_true_states, flat_pred_states, labels=labels, target_names=labels, digits=4)

print("Classification Report (Text Matrix):")
print(report)