In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/ee-pretrain-biobert/trigger-detection-biobert.pt
/kaggle/input/ee-pretrain-biobert/event-cls-biobert.pt
/kaggle/input/ee-pretrained/event-classififcation-pretrained.pt
/kaggle/input/ee-pretrained/trigger-detection-pretrained.pt
/kaggle/input/genia-biomedical-event-dataset/train_data.csv
/kaggle/input/genia-biomedical-event-dataset/test_data.csv
/kaggle/input/genia-biomedical-event-dataset/dev_data.csv
/kaggle/input/genia-biomedical-event-dataset/GE11-LICENSE


In [2]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pandas as pd
from transformers import BertTokenizerFast, BertForTokenClassification, BertTokenizer, BertModel
import torch
from torch import nn
from collections import OrderedDict

In [3]:
# Define the BERT-based classifier
class BertClassifier(nn.Module):
    def __init__(self, num_classes):
        super(BertClassifier, self).__init__()
        self.bert = BertModel.from_pretrained("bert-base-uncased")
        self.dropout = nn.Dropout(0.3)
        self.classifier = nn.Linear(self.bert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        dropout_output = self.dropout(pooled_output)
        return self.classifier(dropout_output)

In [4]:
# Load trigger detection model
def load_trigger_model(model_path, device='cuda' if torch.cuda.is_available() else 'cpu'):
    tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
    model = BertForTokenClassification.from_pretrained('bert-base-uncased', num_labels=2)
    state_dict = torch.load(model_path, map_location=device)
    model.load_state_dict(state_dict)
    model.to(device)
    model.eval()
    return tokenizer, model, device

In [5]:
# Load event classification model
def load_event_model(model_path, num_classes, device='cuda' if torch.cuda.is_available() else 'cpu'):
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = BertClassifier(num_classes=num_classes)
    state_dict = torch.load(model_path, map_location=device)
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        if k.startswith('module.'):
            k = k[len('module.'):]
        new_state_dict[k] = v
    model.load_state_dict(new_state_dict)
    model.to(device)
    model.eval()
    return tokenizer, model, device

In [6]:
# Trigger prediction function
def trigger_predict(sentence, tokenizer, model, device, max_len=512):
    words = sentence.split()
    encoding = tokenizer(
        words,
        is_split_into_words=True,
        return_offsets_mapping=False,
        padding='max_length',
        truncation=True,
        max_length=max_len,
        return_tensors='pt'
    )
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
    logits = outputs.logits
    preds = torch.argmax(logits, dim=-1).squeeze().cpu().numpy()
    word_ids = encoding.word_ids(batch_index=0)
    labels = []
    previous_word_idx = None
    for idx, word_idx in enumerate(word_ids):
        if word_idx is None:
            continue
        elif word_idx != previous_word_idx:
            labels.append(preds[idx])
            previous_word_idx = word_idx
    return list(zip(words, labels))

In [7]:
# Event prediction function
def predict_event(word, tokenizer, model, device, max_len=32):
    tokenizer_output = tokenizer(word, return_tensors="pt", padding=True, truncation=True, max_length=max_len)
    input_ids = tokenizer_output["input_ids"].to(device)
    attention_mask = tokenizer_output["attention_mask"].to(device)
    with torch.no_grad():
        logits = model(input_ids, attention_mask)
        preds = torch.argmax(logits, dim=1).cpu().item()
    return preds

In [8]:
# Evaluate dataset and compute metrics
def evaluate_and_compute_metrics(csv_file, trigger_model_path, event_model_path, index_to_label, label_to_index, output_file):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    num_classes = 9

    trigger_tokenizer, trigger_model, _ = load_trigger_model(trigger_model_path, device)
    event_tokenizer, event_model, _ = load_event_model(event_model_path, num_classes, device)

    data = pd.read_csv(csv_file)
    true_labels = []
    pred_labels = []

    for _, row in data.iterrows():
        sentence = row["Sentence"]
        trigger_words = str(row["TriggerWord"]).split(';')[:-1] if pd.notna(row["TriggerWord"]) else []
        trigger_locs = [(int(x)-1) for x in str(row["TriggerWordLoc"]).split(';')[:-1]] if pd.notna(row["TriggerWordLoc"]) else []
        event_types = str(row["EventType"]).split(';')[:-1] if pd.notna(row["EventType"]) else []

        # Predict triggers and filter out words predicted as trigger
        trigger_predictions = trigger_predict(sentence, trigger_tokenizer, trigger_model, device)
        predicted_trigger_words = [word for word, label in trigger_predictions if label == 1]

        # True labels and predicted labels       
        true_events = ['No Event'] * len(sentence.split())
        for trigger_loc, event in zip(trigger_locs,event_types):
            true_events[trigger_loc] = event
        for item in true_events:
            true_labels.append(item)

        predict_events = ['No Event' if x[1] == 0 else index_to_label[predict_event(x[0], event_tokenizer, event_model, device)] for x in trigger_predictions]
        for item in predict_events:
            pred_labels.append(item)
        

    # Calculate metrics
    accuracy = accuracy_score(true_labels, pred_labels)
    conf_matrix = confusion_matrix(true_labels, pred_labels, labels=list(label_to_index.keys()))
    report = classification_report(true_labels, pred_labels, labels=list(label_to_index.keys()), digits = 4)

    print("Accuracy:", accuracy)
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(report)

    # Save results to file
    results = pd.DataFrame({
        "True_Labels": true_labels,
        "Predicted_Labels": pred_labels
    })
    results.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}.")

# Main

In [11]:
# Paths and parameters
csv_file = '/kaggle/input/genia-biomedical-event-dataset/dev_data.csv'
trigger_model_path = '/kaggle/input/ee-pretrained/trigger-detection-pretrained.pt'
event_model_path = '/kaggle/input/ee-pretrained/event-classififcation-pretrained.pt'
index_to_label = {
    0: 'Negative_regulation', 
     1: 'Gene_expression', 
     2: 'Regulation', 
     3: 'Transcription', 
     4: 'Positive_regulation', 
     5: 'Binding', 
     6: 'Localization', 
     7: 'Phosphorylation', 
     8: 'Protein_catabolism', 
}

label_to_index = {
    "Negative_regulation": 0,
    "Gene_expression": 1,
    "Regulation": 2,
    "Transcription": 3,
    "Positive_regulation": 4,
    "Binding": 5,
    "Localization": 6,
    "Phosphorylation": 7,
    "Protein_catabolism": 8,
    "No Event": 9
}
output_file = '/kaggle/working/evaluation_results.csv'

In [12]:
# Run evaluation
evaluate_and_compute_metrics(csv_file, trigger_model_path, event_model_path, index_to_label, label_to_index, output_file)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  state_dict = torch.load(model_path, map_location=device)


Accuracy: 0.982575576611726
Confusion Matrix:
[[  190     0     4     0     9     0     0     0     0   122]
 [    1   416     0     2    35     0     5     0     0    96]
 [    0     0    90     0    10     1     0     0     0    93]
 [    0    20     0    42     6     0     0     0     0    47]
 [    1     3     3     1   409     0     0     2     0   263]
 [    1     0     0     0     4   142     0     0     0   108]
 [    0     1     0     0     1     0    30     0     0    11]
 [    0     0     0     0     0     0     0    48     0    25]
 [    0     0     0     0     0     0     0     0    14     4]
 [   65    90    46    26   143    47     5    10     2 72660]]
Classification Report:
                     precision    recall  f1-score   support

Negative_regulation     0.7364    0.5846    0.6518       325
    Gene_expression     0.7849    0.7495    0.7668       555
         Regulation     0.6294    0.4639    0.5341       194
      Transcription     0.5915    0.3652    0.4516     

dmis-lab/biobert-base-cased-v1.2