In [None]:
import pandas as pd
import pickle as pkl
from collections import defaultdict, Counter
from itertools import permutations, combinations
from functools import reduce
import numpy as np
import os,sys, io
from io import FileIO
import fnmatch
import re, string
import csv
from utils.helpers import *
from config import *
from preprocessing import *
from dataset import *
from model import *
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report
import wandb
wandb.login(key=WANDB_key)

In [None]:
dataset = {
    'train' : BRATtoDFconvert(TRAIN_DIR),
    'test'  : BRATtoDFconvert(TEST_DIR)
}


In [None]:
train_df = dataset['train']

In [None]:
dataset['train'].info()

In [None]:
print('sentences:', 'min =',str(dataset['train'].sentences.str.len().min()) + ',','max =', str(dataset['train'].sentences.str.len().max()))
print('matches:','min =',str(dataset['train'].match.str.len().min()) + ',','max =', str(dataset['train'].match.str.len().max()))

In [None]:
id2label = dict()
for idx, label in enumerate(dataset['train'].string_id.value_counts().index):
  id2label[idx] = label

In [None]:
id2label

In [None]:
label2id = {v:k for k,v in id2label.items()}
label2id

In [None]:
dataset['train'].shape

In [None]:
dataset['train'].sentences.is_unique

In [None]:
tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-base", task="entity_pair_classification")

In [None]:
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=RANDOM_STATE, shuffle=True)

In [None]:
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=RANDOM_STATE, shuffle=True)
train_dataset = RelationExtractionDataset(data=train_df,tokenizer=tokenizer,label2id=label2id)
valid_dataset = RelationExtractionDataset(data=val_df,tokenizer=tokenizer,label2id=label2id)
test_dataset = RelationExtractionDataset(data=dataset['test'],tokenizer=tokenizer,label2id=label2id)

In [None]:
train_dataset[0].keys()

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=2)
test_dataloader = DataLoader(test_dataset, batch_size=2)

In [None]:
batch = next(iter(train_dataloader))
tokenizer.decode(batch["input_ids"][1])

In [None]:
id2label[batch["label"][1].item()]

In [None]:
batch = next(iter(valid_dataloader))
labels = batch["label"]
batch.keys()

In [None]:
batch["input_ids"].shape

In [None]:
model = LUKE(
    num_labels=len(label2id),
    learning_rate=LEARNING_RATE,
    dataset=dataset,
    tokenizer=tokenizer,
    label2id=label2id)
del batch["label"]
outputs = model(**batch)

In [None]:
criterion = torch.nn.CrossEntropyLoss()

initial_loss = criterion(outputs.logits, labels)
print("Initial loss:", initial_loss)

In [None]:
wandb_logger = WandbLogger(name='LUKE-N2C2-RE', project='LUKE')
# for early stopping, see https://pytorch-lightning.readthedocs.io/en/1.0.0/early_stopping.html?highlight=early%20stopping
early_stop_callback = EarlyStopping(
    monitor='val_loss',
    patience=2,
    strict=False,
    verbose=False,
    mode='min'
)

trainer = Trainer(
    logger=wandb_logger, 
    callbacks=[EarlyStopping(monitor='val_loss')],
    )
trainer.fit(model)

In [None]:
trainer.test()

In [None]:
loaded_model = LUKE.load_from_checkpoint(checkpoint_path="LUKE/checkpoints/epoch=3-step=7699.ckpt")

In [None]:
loaded_model.model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
loaded_model.to(device)

predictions_total = []
labels_total = []
for batch in tqdm(test_dataloader):
    # get the inputs;
    labels = batch["label"]
    del batch["label"]

    # move everything to the GPU
    for k,v in batch.items():
      batch[k] = batch[k].to(device)

    # forward pass
    outputs = loaded_model.model(**batch)
    logits = outputs.logits
    predictions = logits.argmax(-1)
    predictions_total.extend(predictions.tolist())
    labels_total.extend(labels.tolist())

In [None]:
print("Accuracy on test set:", accuracy_score(labels_total, predictions_total))

In [None]:
print(test_df.iloc[0].sentence)

In [None]:
idx = 2
text = test_df.iloc[idx].sentence
entity_spans = test_df.iloc[idx].entity_spans  # character-based entity spans
entity_spans = [tuple(x) for x in entity_spans]

inputs = tokenizer(text, entity_spans=entity_spans, return_tensors="pt")

outputs = loaded_model.model(**inputs)
logits = outputs.logits
predicted_class_idx = logits.argmax(-1).item()
print("Sentence:", text)
print("Ground truth label:", test_df.iloc[idx].string_id)
print("Predicted class idx:", id2label[predicted_class_idx])
print("Confidence:", F.softmax(logits, -1).max().item())