In [34]:
%pip install sentence_transformers



In [35]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [36]:
import os
os.chdir("/content/drive/MyDrive/Pratham")

In [37]:
import math
import random
import numpy as np
import json
import torch
from torch import nn
from collections import defaultdict
from torch.utils.data import DataLoader
from sentence_transformers import SentenceTransformer, InputExample, losses, models, evaluation
from sentence_transformers.evaluation import TripletEvaluator
from sklearn.metrics import f1_score

In [38]:
if torch.backends.mps.is_available():
    device = torch.device('mps')
    print("Device: mps")
if torch.cuda.is_available():
    device = torch.device('cuda')
    print("Device: cuda")
else:
    device = torch.device('cpu')
    print("Device: cpu")

Device: cuda


In [39]:
file_path_train = 'scicite/train.jsonl'
file_path_dev = 'scicite/dev.jsonl'
file_path_test = 'scicite/test.jsonl'
train_data = []
dev_data = []
test_data = []
with open(file_path_train, 'r', encoding='utf-8') as file:
    for line in file:
        train_data.append(json.loads(line))
with open(file_path_dev, 'r', encoding='utf-8') as file:
    for line in file:
        dev_data.append(json.loads(line))
with open(file_path_test, 'r', encoding='utf-8') as file:
    for line in file:
        test_data.append(json.loads(line))
print("Sample data points:")
print(train_data[0])
print(train_data[1])
print(train_data[2])
print("Keys:", list(train_data[0].keys()))
print("Number of training data points:", len(train_data))
print("Label distribution:", {x['label']: sum([1 for y in train_data if y['label'] == x['label']]) for x in train_data[:100]})

Sample data points:
{'source': 'explicit', 'citeEnd': 175, 'sectionName': 'Introduction', 'citeStart': 168, 'string': 'However, how frataxin interacts with the Fe-S cluster biosynthesis components remains unclear as direct one-to-one interactions with each component were reported (IscS [12,22], IscU/Isu1 [6,11,16] or ISD11/Isd11 [14,15]).', 'label': 'background', 'label_confidence': 1.0, 'citingPaperId': '1872080baa7d30ec8fb87be9a65358cd3a7fb649', 'citedPaperId': '894be9b4ea46a5c422e81ef3c241072d4c73fdc0', 'isKeyCitation': True, 'id': '1872080baa7d30ec8fb87be9a65358cd3a7fb649>894be9b4ea46a5c422e81ef3c241072d4c73fdc0', 'unique_id': '1872080baa7d30ec8fb87be9a65358cd3a7fb649>894be9b4ea46a5c422e81ef3c241072d4c73fdc0_11', 'excerpt_index': 11}
{'source': 'explicit', 'citeStart': 16, 'sectionName': 'Novel Quantitative Trait Loci for Seminal Root Traits in Barley', 'string': 'In the study by Hickey et al. (2012), spikes were sampled from the field at the point of physiological\nrobinson et al.

In [40]:
# Parameters
train_batch_size = 16
dev_batch_size = train_batch_size
test_batch_size = train_batch_size
num_epochs = 5

In [41]:
class JointInputLabelSpaceCitationIntentClassifier(nn.Module):
    # This model does a non-linear transformations of the sentence embeddings and the label embeddings into a joint space
    # of dimension joint_space_dimension. For each label, we define the joint representation of the sentence and the label
    # as the element-wise product of the sentence embedding and the label embedding in this joint space. We then apply a
    # linear transformation to this joint representation to get the compatibility score for each label. The label with the
    # highest compatibility score is the predicted label. This model is trained with a cross-entropy loss.
    def __init__(self, model_path, num_labels, joint_space_dimension=256):
        super(JointInputLabelSpaceCitationIntentClassifier, self).__init__()
        self.sentence_transformer = SentenceTransformer(model_path)
        self.num_labels = num_labels
        self.joint_space_dimension = joint_space_dimension
        self.label_texts = ["background", "method", "result"]
        self.label_to_joint_space = nn.Linear(self.sentence_transformer.get_sentence_embedding_dimension(), joint_space_dimension)
        self.sentence_to_joint_space = nn.Linear(self.sentence_transformer.get_sentence_embedding_dimension(), joint_space_dimension)
        self.compatibility_score = nn.Linear(joint_space_dimension, 1)
        self.relu = nn.ReLU()
    def forward(self, input_texts):
        sentence_embeddings = self.sentence_transformer.encode(input_texts, convert_to_tensor=True)
        sentence_embeddings = self.relu(self.sentence_to_joint_space(sentence_embeddings))
        label_embeddings = self.sentence_transformer.encode(self.label_texts, convert_to_tensor=True)
        label_embeddings = self.relu(self.label_to_joint_space(label_embeddings))
        joint_space_embeddings = sentence_embeddings.unsqueeze(1) * label_embeddings.unsqueeze(0)
        compatibility_scores = self.compatibility_score(joint_space_embeddings).squeeze(2)
        return compatibility_scores

In [42]:
class CitationsDatasetWithoutInputExample():
    label_to_id = {'background': 0, 'method': 1, 'result': 2}
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, item):
        return self.data[item]['string'], CitationsDatasetWithoutInputExample.label_to_id[self.data[item]['label']]

In [43]:
model_path = 'output/pre_trained_scibert-scicite'
num_labels = len(CitationsDatasetWithoutInputExample.label_to_id)
citation_intent_classifier_joint_input_label = JointInputLabelSpaceCitationIntentClassifier(model_path, num_labels).to(device)

In [44]:
# Parameters
learning_rate = 2e-5
num_epochs = 6

In [45]:
optimizer = torch.optim.Adam(citation_intent_classifier_joint_input_label.parameters(), lr=learning_rate)
loss_func = torch.nn.CrossEntropyLoss()

In [46]:
train_dataset = CitationsDatasetWithoutInputExample(train_data)
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=train_batch_size)

In [47]:
dev_dataset = CitationsDatasetWithoutInputExample(dev_data)
dev_dataloader = DataLoader(dev_dataset, shuffle=False, batch_size=dev_batch_size)

In [None]:
def train_epoch(model, dataloader, loss_func, optimizer):
    model.train()
    total_loss = 0
    for input_texts, labels in dataloader:
        labels = labels.to(device)
        optimizer.zero_grad()
        output = model(input_texts)
        loss = loss_func(output, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Training loss: {total_loss / len(dataloader)}")

def evaluate(model, dataloader, loss_func):
    model.eval()
    total_loss = 0
    total_correct = 0
    with torch.no_grad():
        for input_texts, labels in dataloader:
            labels = labels.to(device)
            output = model(input_texts)
            loss = loss_func(output, labels)
            total_loss += loss.item()
            total_correct += (output.argmax(1) == labels).sum().item()
    print(f"Evaluation loss: {total_loss / len(dataloader)}")
    print(f"Evaluation accuracy: {total_correct / len(dataloader.dataset)}")

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    train_epoch(citation_intent_classifier_joint_input_label, train_dataloader, loss_func, optimizer)
    evaluate(citation_intent_classifier_joint_input_label, dev_dataloader, loss_func)
    predictions_128_6, true_labels_128_6 = test(citation_intent_classifier_joint_input_label, test_dataloader, device)
    f1_128_6 = f1_score(true_labels_128_6, predictions_128_6, average='macro')
    print(f"F1 Score: {f1_128_6}")

Epoch 1/6
Training loss: 0.4712394453484074
Evaluation loss: 0.4319787157763695
Evaluation accuracy: 0.8482532751091703
F1 Score: 0.8786662397255031
Epoch 2/6
Training loss: 0.2876845239336754
Evaluation loss: 0.43278660220575743
Evaluation accuracy: 0.8449781659388647
F1 Score: 0.8728828596573841
Epoch 3/6
Training loss: 0.2775215146009081
Evaluation loss: 0.4266696148391428
Evaluation accuracy: 0.8471615720524017
F1 Score: 0.8736917149492655
Epoch 4/6
Training loss: 0.2723651743284607
Evaluation loss: 0.4294319671161216
Evaluation accuracy: 0.8482532751091703
F1 Score: 0.8747344717970176
Epoch 5/6
Training loss: 0.2691958535664765
Evaluation loss: 0.42474906782394856
Evaluation accuracy: 0.8493449781659389
F1 Score: 0.8753616597669988
Epoch 6/6


In [16]:
test_dataset = CitationsDatasetWithoutInputExample(test_data)
test_dataloader = DataLoader(test_dataset, shuffle=False, batch_size=test_batch_size)

In [17]:
def test(model, dataloader, device):
    model.eval()
    predictions = []
    true_labels = []

    with torch.no_grad():
        for input_texts, labels in dataloader:
            labels = labels.to(device)
            output = model(input_texts)
            _, predicted_labels = torch.max(output, dim=1)
            predictions.extend(predicted_labels.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
    return predictions, true_labels

In [18]:
predictions_128_3, true_labels_128_3 = test(citation_intent_classifier_joint_input_label, test_dataloader, device)
f1_128_3 = f1_score(true_labels_128_3, predictions_128_3, average='macro')
print(f"F1 Score: {f1_128_3}")

F1 Score: 0.8698521531570188


In [None]:
predictions, true_labels = test(citation_intent_classifier_joint_input_label, test_dataloader, device)

In [None]:
f1_512 = f1_score(true_labels, predictions, average='macro')
print(f"F1 Score: {f1_512}")

F1 Score: 0.8703296104322225


In [None]:
torch.save(citation_intent_classifier_joint_input_label.state_dict(), 'output/citation_intent_classifier_joint_input_label.pth')

In [None]:
predictions_128, true_labels_128 = test(citation_intent_classifier_joint_input_label, test_dataloader, device)
f1_128 = f1_score(true_labels_128, predictions_128, average='macro')
print(f"F1 Score: {f1_128}")

F1 Score: 0.8715763062959508


In [None]:
predictions_128_5, true_labels_128_5 = test(citation_intent_classifier_joint_input_label, test_dataloader, device)
f1_128_5 = f1_score(true_labels_128_5, predictions_128_5, average='macro')
print(f"F1 Score: {f1_128_5}")

F1 Score: 0.8683467971369844
