In [17]:
from google.colab import drive
drive.mount('/content/Drive')

Drive already mounted at /content/Drive; to attempt to forcibly remount, call drive.mount("/content/Drive", force_remount=True).


In [18]:
!pip install torch torchvision torchaudio
!pip install transformers
!pip install tqdm



In [19]:
import json
import random
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional

from tqdm import tqdm
from sklearn.metrics import f1_score, precision_score, recall_score
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from transformers import BertForSequenceClassification, BertTokenizer, BertModel, AdamW
# from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaModel, AdamW

In [20]:
# List of relation types
keys = ['no_relation', 'per:title', 'org:top_members/employees',
        'org:country_of_headquarters', 'per:parents', 'per:age',
        'per:countries_of_residence', 'per:children', 'org:alternate_names',
        'per:charges', 'per:cities_of_residence', 'per:origin', 'org:founded_by',
        'per:employee_of', 'per:siblings', 'per:alternate_names', 'org:website',
        'per:religion', 'per:stateorprovince_of_death', 'org:parents',
        'org:subsidiaries', 'per:other_family', 'per:stateorprovinces_of_residence',
        'org:members', 'per:cause_of_death', 'org:member_of',
        'org:number_of_employees/members', 'per:country_of_birth',
        'org:shareholders', 'org:stateorprovince_of_headquarters',
        'per:city_of_death', 'per:date_of_birth', 'per:spouse',
        'org:city_of_headquarters', 'per:date_of_death', 'per:schools_attended',
        'org:political/religious_affiliation', 'per:country_of_death',
        'org:founded', 'per:stateorprovince_of_birth', 'per:city_of_birth',
        'org:dissolved']

# Assigning indices to the list elements and storing them in a dictionary
rel2id = {key: idx for idx, key in enumerate(keys)}
id2rel = {idx: key for key, idx in rel2id.items()}

In [22]:

# input example
token = ["He", "has", "served", "as", "a", "policy", "aide", "to", "the", "late", "U.S.", "Senator", "Alan", "Cranston", ",", "as", "National", "Issues", "Director", "for", "the", "2004", "presidential", "campaign", "of", "Congressman", "Dennis", "Kucinich", ",", "as", "a", "co-founder", "of", "Progressive", "Democrats", "of", "America", "and", "as", "a", "member", "of", "the", "international", "policy", "department", "at", "the", "RAND", "Corporation", "think", "tank", "before", "all", "that", "."]
subj_start = 33
subj_end = 36
obj_start = 43
obj_end = 45

text = ' '.join(token)

subj = token[subj_start:subj_end+1]
obj = token[obj_start:obj_end+1]

ent1 = " ".join(subj)
ent2 = " ".join(obj)

print(text)
print(ent1)
print(ent2)


He has served as a policy aide to the late U.S. Senator Alan Cranston , as National Issues Director for the 2004 presidential campaign of Congressman Dennis Kucinich , as a co-founder of Progressive Democrats of America and as a member of the international policy department at the RAND Corporation think tank before all that .
Progressive Democrats of America
international policy department


In [23]:
def predict_relation(ent1, ent2, text, model_path, rel2id, id2rel):
    # Define the device based on CUDA availability
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load the tokenizer
    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True)

    # Initialize the model with the number of labels
    labels_num = len(rel2id)
    # model = BERT_Classifier(labels_num).to(device)

    # Load the model state
    model = torch.load(model_path)
    model.eval()

    # Preparing the inputs for the model
    max_length = 128
    sentence = ent1 + ent2 + text
    indexed_tokens = tokenizer.encode(sentence, add_special_tokens=True, max_length=max_length, truncation=True)
    tokens_tensor = torch.tensor([indexed_tokens]).to(device)

    # Creating attention mask
    attention_mask = torch.zeros_like(tokens_tensor).long()
    attention_mask[0, :len(indexed_tokens)] = 1

    # Performing the inference
    with torch.no_grad():
        outputs = model(tokens_tensor, attention_mask=attention_mask)
        logits = outputs[0] if len(outputs) == 1 else outputs[1]
        _, predicted = torch.max(logits, dim=1)
        relation_id = predicted.item()

    # Output the predicted relation
    predicted_relation = id2rel[relation_id]
    return predicted_relation

# Example usage:
model_path = "/content/Drive/MyDrive/COMP61332_text_mining/RE/Tacred/Bert_RE/inference/span-bert_best_model.pth"
predicted_relation = predict_relation(ent1, ent2, text, model_path, rel2id, id2rel)
print(f"Predicted Relation: {predicted_relation}")

Predicted Relation: no_relation
