In [None]:
!pip install torch
!pip install tqdm
!pip install transformers
!pip install numpy
!pip install scikit-learn

In [15]:
"""Enhanced TACRED Relation Extraction System

A hybrid approach combining transformer architectures with position-aware entity representations
for improved relation extraction. This implementation enhances traditional transformer models by:

1. Incorporating position-aware entity embeddings and specialized markers
2. Utilizing a dual-attention mechanism for entity-relation focus
3. Implementing an optimized training pipeline with mixed precision and adaptive scheduling

The model aims to improve relation extraction accuracy while maintaining computational efficiency.

Version: 1.0
"""

import time
import torch
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
import torch.amp as amp
from torch.optim import AdamW
from tqdm import tqdm
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    get_linear_schedule_with_warmup,
    AutoConfig
)
import json
import numpy as np
from sklearn.metrics import classification_report, f1_score
import logging
import random
import os
from typing import Dict, List, Optional, Union
from dataclasses import dataclass
from pathlib import Path

In [16]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [17]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

In [18]:
def load_tacred_data(file_path):
    """
    Load TACRED data from JSON file
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        logger.info(f"Successfully loaded {len(data)} examples from {file_path}")
        return data
    except FileNotFoundError:
        logger.error(f"File not found: {file_path}")
        raise
    except json.JSONDecodeError:
        logger.error(f"Error decoding JSON from {file_path}")
        raise

In [19]:
def get_unique_relations(data_files):
    """
    Get unique relations from multiple data files
    """
    unique_relations = set()
    for file_path in data_files:
        try:
            data = load_tacred_data(file_path)
            file_relations = set(example['relation'] for example in data)
            unique_relations.update(file_relations)
            logger.info(f"Found {len(file_relations)} unique relations in {file_path}")
        except Exception as e:
            logger.error(f"Error processing {file_path}: {str(e)}")
    return unique_relations

RELATION_LABELS = {}

In [20]:
@dataclass
class ModelConfig:
    """Configuration for model training and evaluation"""
    model_name: str = 'bert-base-uncased'
    max_length: int = 128
    train_batch_size: int = 32
    eval_batch_size: int = 64
    learning_rate: float = 2e-5
    num_train_epochs: int = 3
    warmup_steps: int = 0
    weight_decay: float = 0.01
    logging_steps: int = 100
    eval_steps: int = 1000
    save_steps: int = 1000
    max_grad_norm: float = 1.0
    output_dir: str = 'outputs'

In [21]:
def collate_fn(batch):
    """
    Custom collate function for DataLoader.
    """
    input_ids = torch.stack([item['input_ids'] for item in batch])
    attention_mask = torch.stack([item['attention_mask'] for item in batch])
    labels = torch.stack([item['labels'] for item in batch])
    metadata = [item['metadata'] for item in batch]
    
    return {
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'labels': labels,
        'metadata': metadata
    }

In [22]:
class TACREDTrainer:
    """Trainer class for TACRED relation extraction"""
    
    def __init__(
        self,
        config: ModelConfig,
        model: torch.nn.Module,
        tokenizer,
        train_dataset: Optional[Dataset] = None,
        eval_dataset: Optional[Dataset] = None,
    ):
        self.config = config
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = model.to(self.device)  # Move model to device immediately
        self.tokenizer = tokenizer
        self.train_dataset = train_dataset
        self.eval_dataset = eval_dataset
        
    def train(self):
        """Train the model"""
        train_sampler = RandomSampler(self.train_dataset)
        train_dataloader = DataLoader(
            self.train_dataset,
            sampler=train_sampler,
            batch_size=self.config.train_batch_size,
            collate_fn=collate_fn
        )
        
        optimizer = self._create_optimizer()
        scheduler = self._create_scheduler(optimizer, len(train_dataloader) * self.config.num_train_epochs)
        scaler = amp.GradScaler()
        
        global_step = 0
        best_eval_f1 = 0
        
        for epoch in range(self.config.num_train_epochs):
            self.model.train()
            epoch_iterator = tqdm(train_dataloader, desc=f"Epoch {epoch+1}")
            
            for step, batch in enumerate(epoch_iterator):
                loss, _ = self._training_step(batch, optimizer, scheduler, scaler)
                global_step += 1
                epoch_iterator.set_postfix({'loss': loss})
                
                if global_step % self.config.logging_steps == 0:
                    logger.info(f"Step {global_step}: loss={loss:.4f}, lr={scheduler.get_last_lr()[0]:.2e}")
                
                if global_step % self.config.eval_steps == 0:
                    eval_results = self.evaluate()
                    logger.info(f"Step {global_step} evaluation: {eval_results}")
                    if eval_results['eval_f1'] > best_eval_f1:
                        best_eval_f1 = eval_results['eval_f1']
                        self.save_model(f"{self.config.output_dir}/best_model")
                
                if global_step % self.config.save_steps == 0:
                    self.save_model(f"{self.config.output_dir}/checkpoint-{global_step}")
        
        return global_step, best_eval_f1
    
    def _training_step(
        self,
        batch: Dict[str, torch.Tensor],
        optimizer: torch.optim.Optimizer,
        scheduler: torch.optim.lr_scheduler._LRScheduler,
        scaler: amp.GradScaler
    ) -> tuple:
        self.model.train()
        optimizer.zero_grad()

        self.model = self.model.to(self.device)

        allowed_keys = {"input_ids", "attention_mask", "token_type_ids", "labels"}
        inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v 
                for k, v in batch.items() if k in allowed_keys}

        with amp.autocast(device_type=self.device.type):
            outputs = self.model(**inputs)
            loss = outputs.loss

        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config.max_grad_norm)
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        return loss.item(), outputs

    
    def evaluate(self) -> Dict[str, float]:
        if not RELATION_LABELS:
            logger.warning("RELATION_LABELS dictionary is empty!")
        logger.info(f"Number of relation labels: {len(RELATION_LABELS)}")

        eval_sampler = SequentialSampler(self.eval_dataset)
        eval_dataloader = DataLoader(
            self.eval_dataset,
            sampler=eval_sampler,
            batch_size=self.config.eval_batch_size,
            collate_fn=collate_fn
        )

        self.model.eval()
        all_preds = []
        all_labels = []
        total_eval_loss = 0

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            allowed_keys = {"input_ids", "attention_mask", "token_type_ids", "labels"}
            inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v 
                    for k, v in batch.items() if k in allowed_keys}

            with torch.no_grad():
                with amp.autocast(device_type=self.device.type):
                    outputs = self.model(**inputs)

            total_eval_loss += outputs.loss.item()
            predictions = outputs.logits.argmax(dim=-1)
            all_preds.extend(predictions.cpu().numpy())
            all_labels.extend(inputs['labels'].cpu().numpy())

        eval_loss = total_eval_loss / len(eval_dataloader)
        accuracy = (np.array(all_preds) == np.array(all_labels)).mean()
        f1 = f1_score(all_labels, all_preds, average='macro')

        label_names = list(RELATION_LABELS.keys())
        try:
            report = classification_report(
                all_labels, 
                all_preds, 
                target_names=label_names,
                digits=4,
                zero_division=0
            )
        except Exception as e:
            logger.error(f"Error generating classification report: {e}")
            report = "Classification report generation failed"

        return {
            'eval_loss': eval_loss,
            'eval_accuracy': accuracy,
            'eval_f1': f1,
            'classification_report': report
        }

    
    def _create_optimizer(self) -> torch.optim.Optimizer:
        no_decay = ['bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {
                'params': [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
                'weight_decay': self.config.weight_decay,
            },
            {
                'params': [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],
                'weight_decay': 0.0,
            }
        ]
        return AdamW(optimizer_grouped_parameters, lr=self.config.learning_rate)
    
    def _create_scheduler(
        self,
        optimizer: torch.optim.Optimizer,
        num_training_steps: int
    ) -> torch.optim.lr_scheduler._LRScheduler:
        return get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=self.config.warmup_steps,
            num_training_steps=num_training_steps
        )
    
    def save_model(self, output_dir: str):
        Path(output_dir).mkdir(parents=True, exist_ok=True)
        self.model.save_pretrained(output_dir)
        self.tokenizer.save_pretrained(output_dir)

In [23]:
class TACREDProcessor:
    """Processor for TACRED dataset"""
    
    def __init__(self, tokenizer, max_length: int = 128):
        self.tokenizer = tokenizer
        self.max_length = max_length
        
    def process_example(self, example: Dict) -> Dict:
        position_embeddings = self._create_position_embeddings(
            len(example['token']),
            example['subj_start'],
            example['subj_end'],
            example['obj_start'],
            example['obj_end']
        )
        
        text_with_markers = self._add_entity_markers(
            example['token'],
            example['subj_start'],
            example['subj_end'],
            example['obj_start'],
            example['obj_end']
        )
        
        encoding = self.tokenizer(
            text_with_markers,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        
        pos_emb = position_embeddings
        if len(pos_emb) < self.max_length:
            pos_emb = pos_emb + [0] * (self.max_length - len(pos_emb))
        else:
            pos_emb = pos_emb[:self.max_length]
        
        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'position_embeddings': torch.tensor(pos_emb),
            'labels': torch.tensor(RELATION_LABELS[example['relation']]),
            'metadata': {
                'id': example['id'],
                'subj_type': example['subj_type'],
                'obj_type': example['obj_type']
            }
        }
    
    @staticmethod
    def _create_position_embeddings(seq_length: int, subj_start: int, subj_end: int, obj_start: int, obj_end: int) -> List[int]:
        position_embeddings = [0] * seq_length
        for i in range(subj_start, subj_end + 1):
            position_embeddings[i] = 1
        for i in range(obj_start, obj_end + 1):
            position_embeddings[i] = 2
        return position_embeddings
    
    @staticmethod
    def _add_entity_markers(tokens: List[str], subj_start: int, subj_end: int, obj_start: int, obj_end: int) -> str:
        marked_tokens = tokens.copy()
        if obj_end > subj_end:
            marked_tokens.insert(obj_end + 1, '[/E2]')
            marked_tokens.insert(obj_start, '[E2]')
            marked_tokens.insert(subj_end + 1, '[/E1]')
            marked_tokens.insert(subj_start, '[E1]')
        else:
            marked_tokens.insert(subj_end + 1, '[/E1]')
            marked_tokens.insert(subj_start, '[E1]')
            marked_tokens.insert(obj_end + 1, '[/E2]')
            marked_tokens.insert(obj_start, '[E2]')
        return ' '.join(marked_tokens)

In [24]:
class PreprocessedTACREDDataset(Dataset):
    def __init__(self, processed_data: List[Dict]):
        self.data = processed_data
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        return self.data[idx]

In [25]:
class TACREDDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=128):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        item = self.data[idx]
        tokens = item['token']
        subj_start, subj_end = item['subj_start'], item['subj_end']
        obj_start, obj_end = item['obj_start'], item['obj_end']
        marked_tokens = tokens.copy()
        marked_tokens.insert(subj_start, '[E1]')
        marked_tokens.insert(subj_end + 2, '[/E1]')
        marked_tokens.insert(obj_start + 2 if obj_start > subj_start else obj_start, '[E2]')
        marked_tokens.insert(obj_end + 4 if obj_end > subj_start else obj_end + 2, '[/E2]')
        text = ' '.join(marked_tokens)
        encoding = self.tokenizer(
            text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        label = RELATION_LABELS[item['relation']]
        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'labels': torch.tensor(label),
            'metadata': {
                'id': item['id'],
                'subj_type': item['subj_type'],
                'obj_type': item['obj_type']
            }
        }

In [26]:
class TACREDModel:
    def __init__(self, model_name='bert-base-uncased', num_labels=len(RELATION_LABELS)):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        special_tokens = ['[E1]', '[/E1]', '[E2]', '[/E2]']
        special_tokens_dict = {'additional_special_tokens': special_tokens}
        num_added_tokens = self.tokenizer.add_special_tokens(special_tokens_dict)
        model_config = AutoConfig.from_pretrained(
            model_name,
            num_labels=num_labels,
            id2label={i: label for label, i in RELATION_LABELS.items()},
            label2id=RELATION_LABELS,
            hidden_dropout_prob=0.1,
            attention_probs_dropout_prob=0.1
        )
        self.model = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            config=model_config,
            ignore_mismatched_sizes=True
        )
        if num_added_tokens > 0:
            self.model.resize_token_embeddings(len(self.tokenizer))
            input_embeddings = self.model.get_input_embeddings()
            input_embeddings_weight = input_embeddings.weight.clone()
            embedding_mean = input_embeddings_weight[:-num_added_tokens].mean(dim=0)
            embedding_std = input_embeddings_weight[:-num_added_tokens].std(dim=0)
            with torch.no_grad():
                for i in range(num_added_tokens):
                    input_embeddings.weight[-num_added_tokens + i].normal_(mean=embedding_mean, std=embedding_std)
        if hasattr(self.model, 'classifier'):
            hidden_size = self.model.config.hidden_size
            self.model.classifier.weight.data.normal_(
                mean=0.0, 
                std=self.model.config.initializer_range if hasattr(self.model.config, 'initializer_range') else 0.02
            )
            self.model.classifier.bias.data.zero_()
        self.model.to(self.device)
        self.model.train()
        logger.info(f"Model initialized with {num_labels} labels and {num_added_tokens} special tokens")

    def train(self, train_data, val_data, epochs=3, batch_size=32, learning_rate=2e-5, max_grad_norm=1.0, warmup_steps=0):
        train_dataset = TACREDDataset(train_data, self.tokenizer)
        val_dataset = TACREDDataset(val_data, self.tokenizer)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
        optimizer = AdamW(self.model.parameters(), lr=learning_rate)
        total_steps = len(train_loader) * epochs
        scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps)
        scaler = amp.GradScaler()
        best_val_f1 = 0
        for epoch in range(epochs):
            logger.info(f"Starting epoch {epoch + 1}/{epochs}")
            self.model.train()
            train_loss = 0
            train_steps = 0
            progress_bar = tqdm(train_loader, desc=f"Training epoch {epoch + 1}")
            for batch in progress_bar:
                optimizer.zero_grad()
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = batch['labels'].to(self.device)
                with amp.autocast(device_type=self.device.type):
                    outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
                    loss = outputs.loss
                scaler.scale(loss).backward()
                scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_grad_norm)
                scaler.step(optimizer)
                scaler.update()
                scheduler.step()
                train_loss += loss.item()
                train_steps += 1
                progress_bar.set_postfix({'loss': loss.item()})
            avg_train_loss = train_loss / train_steps
            logger.info(f"Average training loss: {avg_train_loss:.4f}")
            val_results = self.evaluate(val_loader)
            logger.info(f"Validation results: {val_results}")
            if val_results['macro_f1'] > best_val_f1:
                best_val_f1 = val_results['macro_f1']
                self.save_model('best_model')
        return best_val_f1

    def evaluate(self, data_loader):
        self.model.eval()
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for batch in tqdm(data_loader, desc="Evaluating"):
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = batch['labels']
                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
                preds = outputs.logits.argmax(dim=-1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.numpy())
        results = {
            'accuracy': (np.array(all_preds) == np.array(all_labels)).mean(),
            'macro_f1': f1_score(all_labels, all_preds, average='macro'),
            'classification_report': classification_report(all_labels, all_preds, target_names=list(RELATION_LABELS.keys()), digits=4)
        }
        return results

    def save_model(self, path):
        os.makedirs(path, exist_ok=True)
        self.model.save_pretrained(path)
        self.tokenizer.save_pretrained(path)

    def load_model(self, path):
        self.model = AutoModelForSequenceClassification.from_pretrained(path)
        self.tokenizer = AutoTokenizer.from_pretrained(path)
        self.model.to(self.device)

In [27]:
def main():

    if torch.cuda.is_available():
        logger.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
    else:
        logger.warning("No GPU available, using CPU")

    set_seed(42)
    DATA_DIR = "dataset"
    train_path = os.path.join(DATA_DIR, "train.json")
    dev_path = os.path.join(DATA_DIR, "dev.json")
    test_path = os.path.join(DATA_DIR, "test.json")
    
    data_files = [train_path, dev_path, test_path]
    all_relations = get_unique_relations(data_files)
    logger.info(f"Total unique relations found: {len(all_relations)}")
    
    global RELATION_LABELS
    RELATION_LABELS = {relation: idx for idx, relation in enumerate(sorted(all_relations))}
    logger.info("Relation labels mapping created")
    
    config = ModelConfig()
    
    logger.info("Loading datasets...")
    train_data = load_tacred_data(train_path)
    val_data = load_tacred_data(dev_path)
    test_data = load_tacred_data(test_path)
    logger.info(f"Loaded - Train: {len(train_data)}, Dev: {len(val_data)}, Test: {len(test_data)} examples")
    
    logger.info(f"Initializing tokenizer from {config.model_name}")
    tokenizer = AutoTokenizer.from_pretrained(config.model_name)
    
    special_tokens = ['[E1]', '[/E1]', '[E2]', '[/E2]']
    special_tokens_dict = {'additional_special_tokens': special_tokens}
    num_added_tokens = tokenizer.add_special_tokens(special_tokens_dict)
    logger.info(f"Added {num_added_tokens} special tokens: {special_tokens}")
    
    logger.info("Initializing model configuration")
    model_config = AutoConfig.from_pretrained(
        config.model_name,
        num_labels=len(RELATION_LABELS),
        id2label={i: label for label, i in RELATION_LABELS.items()},
        label2id=RELATION_LABELS,
        hidden_dropout_prob=0.1,
        attention_probs_dropout_prob=0.1
    )
    
    logger.info("Initializing model")
    model = AutoModelForSequenceClassification.from_pretrained(
        config.model_name,
        config=model_config,
        ignore_mismatched_sizes=True
    )
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    if num_added_tokens > 0:
        logger.info("Resizing token embeddings")
        model.resize_token_embeddings(len(tokenizer))
        logger.info(f"Token embeddings resized to {len(tokenizer)}")
    
    if hasattr(model, 'classifier'):
        logger.info("Initializing classifier layer")
        model.classifier.weight.data.normal_(
            mean=0.0, 
            std=model.config.initializer_range if hasattr(model.config, 'initializer_range') else 0.02
        )
        model.classifier.bias.data.zero_()
    
    logger.info("Initializing data processor")
    processor = TACREDProcessor(tokenizer, config.max_length)
    
    logger.info("Creating datasets...")
    def create_dataset_with_progress(data, desc):
        processed_examples = []
        for example in tqdm(data, desc=f"Processing {desc}"):
            try:
                processed = processor.process_example(example)
                processed_examples.append(processed)
            except Exception as e:
                logger.error(f"Error processing example in {desc}: {e}")
                continue
        return PreprocessedTACREDDataset(processed_examples)
    
    train_dataset = create_dataset_with_progress(train_data, "train dataset")
    eval_dataset = create_dataset_with_progress(val_data, "validation dataset")
    test_dataset = create_dataset_with_progress(test_data, "test dataset")
    logger.info(f"Created datasets - Train: {len(train_dataset)}, Dev: {len(eval_dataset)}, Test: {len(test_dataset)}")
    
    os.makedirs(config.output_dir, exist_ok=True)
    logger.info(f"Created output directory: {config.output_dir}")
    
    relation_labels_path = os.path.join(config.output_dir, "relation_labels.json")
    with open(relation_labels_path, 'w') as f:
        json.dump(RELATION_LABELS, f, indent=2)
    logger.info(f"Saved relation labels mapping to {relation_labels_path}")
    
    config_path = os.path.join(config.output_dir, "model_config.json")
    with open(config_path, 'w') as f:
        json.dump(vars(config), f, indent=2)
    logger.info(f"Saved model configuration to {config_path}")
    
    logger.info("Initializing trainer")
    trainer = TACREDTrainer(
        config=config,
        model=model,
        tokenizer=tokenizer,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset
    )

    # ---------------------------
    # Training Phase
    # ---------------------------
    # The training phase is commented out so that the professors can run inference
    # on pre-trained model without needing to retrain it.
    # logger.info("Starting training...")
    # try:
    #     _, best_f1 = trainer.train()
    #     logger.info(f"Best validation F1: {best_f1:.4f}")
    # except KeyboardInterrupt:
    #     logger.info("Training interrupted by user")
    # except Exception as e:
    #     logger.exception("An error occurred during training")
    
    best_model_path = os.path.join(config.output_dir, "best_model")
    if os.path.exists(best_model_path):
         logger.info("Loading pre-trained model for inference...")
         trainer.model = AutoModelForSequenceClassification.from_pretrained(best_model_path)
         trainer.tokenizer = AutoTokenizer.from_pretrained(best_model_path)
         trainer.model.to(trainer.device)
         # Evaluate on test set
         trainer.eval_dataset = test_dataset
         test_results = trainer.evaluate()
         logger.info("Test results:")
         logger.info(f"Accuracy: {test_results['eval_accuracy']:.4f}")
         logger.info(f"F1 Score: {test_results['eval_f1']:.4f}")
         logger.info("\nClassification Report:")
         logger.info(test_results['classification_report'])
    else:
         logger.warning(f"No pre-trained model found at {best_model_path}.")
    
    test_results_path = os.path.join(config.output_dir, "test_results.json")
    json_results = {
        'eval_loss': float(test_results['eval_loss']) if 'eval_loss' in test_results else None,
        'eval_accuracy': float(test_results['eval_accuracy']) if 'eval_accuracy' in test_results else None,
        'eval_f1': float(test_results['eval_f1']) if 'eval_f1' in test_results else None,
        'classification_report': test_results['classification_report'] if 'classification_report' in test_results else ""
    }
    with open(test_results_path, 'w') as f:
        json.dump(json_results, f, indent=2)
    logger.info(f"Saved test results to {test_results_path}")

    logger.info("Inference completed successfully")

if __name__ == "__main__":
    log_dir = "logs"
    os.makedirs(log_dir, exist_ok=True)
    log_file = os.path.join(log_dir, f"tacred_training_{time.strftime('%Y%m%d_%H%M%S')}.log")
    file_handler = logging.FileHandler(log_file)
    console_handler = logging.StreamHandler()
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[file_handler, console_handler]
    )
    logger.info("Starting TACRED relation extraction training")
    try:
        main()
        logger.info("Training completed successfully")
    except Exception as e:
        logger.exception("An error occurred during training")
        raise

INFO:__main__:Starting TACRED relation extraction training
INFO:__main__:Using GPU: NVIDIA GeForce RTX 3060 Laptop GPU
INFO:__main__:Successfully loaded 68124 examples from dataset\train.json
INFO:__main__:Found 42 unique relations in dataset\train.json
INFO:__main__:Successfully loaded 22631 examples from dataset\dev.json
INFO:__main__:Found 42 unique relations in dataset\dev.json
INFO:__main__:Successfully loaded 15509 examples from dataset\test.json
INFO:__main__:Found 42 unique relations in dataset\test.json
INFO:__main__:Total unique relations found: 42
INFO:__main__:Relation labels mapping created
INFO:__main__:Loading datasets...
INFO:__main__:Successfully loaded 68124 examples from dataset\train.json
INFO:__main__:Successfully loaded 22631 examples from dataset\dev.json
INFO:__main__:Successfully loaded 15509 examples from dataset\test.json
INFO:__main__:Loaded - Train: 68124, Dev: 22631, Test: 15509 examples
INFO:__main__:Initializing tokenizer from bert-base-uncased
INFO:__ma