In [18]:
!pip install transformers datasets accelerate evaluate





[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
from datasets import load_dataset

In [20]:
data = load_dataset(
    "json",
    data_files={
        "train":"train.jsonl",
        "validation" : "validation.jsonl"

        }
)

In [5]:
label_map = {"A": 0, "B": 1, "C": 2, "D": 3}

def convert_multi_label(example):
    labels = [0, 0, 0, 0]  # A,B,C,D

    answers = example["golden_answer"].split(",")

    for ans in answers:
        ans = ans.strip()
        labels[label_map[ans]] = float(1)

    example["labels"] = labels
    return example




In [22]:
data = data.map(convert_multi_label)

Map:   0%|          | 0/1819 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [23]:
def build_input(example):
    text = (
        f"Target event: {example['target_event']}\n"
        f"A: {example['option_A']}\n"
        f"B: {example['option_B']}\n"
        f"C: {example['option_C']}\n"
        f"D: {example['option_D']}\n"
        "Which options are the plausible causes?"
    )
    return {"text": text}

data = data.map(build_input)


Map:   0%|          | 0/1819 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [24]:
from transformers import AutoTokenizer

model_name = "roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(
        batch["text"],
        truncation=True,
        padding="max_length",
        max_length=256
    )

tokenized = data.map(tokenize, batched=True)


Map:   0%|          | 0/1819 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [25]:
tokenized = tokenized.remove_columns([
    "text", "topic_id", "uuid",
    "target_event", "option_A", "option_B",
    "option_C", "option_D", "golden_answer"
])


In [26]:
tokenized.set_format(type="torch", columns=["input_ids","attention_mask","labels"])


In [27]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels = 4,
    problem_type = "multi_label_classification"
)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [28]:
from transformers import TrainingArguments,Trainer
training_args = TrainingArguments(
    output_dir="./aer_multilabled_model",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=4,
    weight_decay=0.01,
    load_best_model_at_end=True

)

In [29]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["validation"],
    tokenizer = tokenizer
)

  trainer = Trainer(


In [None]:
# take much time, 250 min atleast
trainer.train() 

In [None]:
trainer.evaluate()

In [None]:
trainer.save_model("aer_multilabel_roberta")
tokenizer.save_pretrained("aer_multilabel_roberta")


In [None]:
import torch
import torch.nn.functional as F

def predict_multilabel(target_event, A, B, C, D, threshold=0.5):

    text = (
        f"Target event: {target_event}\n"
        f"A: {A}\n"
        f"B: {B}\n"
        f"C: {C}\n"
        f"D: {D}\n"
        "Which options are the plausible causes?"
    )

    inputs = tokenizer(text, return_tensors="pt", truncation=True)

    logits = model(**inputs).logits
    probs = torch.sigmoid(logits)[0]

    labels = []
    options = ["A", "B", "C", "D"]

    for i, p in enumerate(probs):
        if p >= threshold:
            labels.append(options[i])

    return labels, probs.tolist()


In [None]:
answers, probs = predict_multilabel(
    "South Korea’s parliament voted to impeach President Yoon Suk Yeol.",
    "Yoon's senior aides and defense minister offered to resign",
    "Hundreds of soldiers stormed the National Assembly",
    "Tens of thousands of protesters gathered outside the National Assembly in Seoul",
    "President Yoon Suk Yeol declared martial law on December 3 and sent soldiers to parliament"
)

print("Predicted:", answers)
print("Probabilities:", probs)


In [None]:
# The evaluation

In [2]:
# Import model and make input 
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

model_path = "aer_multilabel_roberta"

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.eval()  


def predict_multilabel1(target_event, A, B, C, D, threshold=0.5):

    text = (
        f"Target event: {target_event}\n"
        f"A: {A}\n"
        f"B: {B}\n"
        f"C: {C}\n"
        f"D: {D}\n"
        "Which options are the plausible causes?"
    )

    # Tokenize
    inputs = tokenizer(text, return_tensors="pt", truncation=True)

    # Run model prediction
    with torch.no_grad():  # prevent gradients
        logits = model(**inputs).logits

    # Convert logits → probabilities
    probs = torch.sigmoid(logits)[0]

    # Map probabilities to labels
    labels = []
    labels_binary = []
    options = ["A", "B", "C", "D"]
    
    for i, p in enumerate(probs):
        if p >= threshold:
            labels.append(options[i])
            labels_binary.append(1)
        else:
            labels_binary.append(0)

    return labels,labels_binary, probs.tolist()

In [3]:
data2 = load_dataset("json", data_files = {"train":"validation_for_accuracy.jsonl"})

Generating train split: 0 examples [00:00, ? examples/s]

In [6]:
data2 = data2.map(convert_multi_label)

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

In [7]:
# Input data from data2

ansPredicted = []
for i in range (0,len(data2["train"])):
    ans,ans_label,prob = predict_multilabel1(
        data2["train"][i]["target_event"],
        data2["train"][i]["option_A"],
        data2["train"][i]["option_B"],
        data2["train"][i]["option_C"],
        data2["train"][i]["option_D"] 
    )
    ansPredicted.append(ans_label)    

In [8]:
# Comparing predicted answers with actuals
count = 0
for i in range (0,len(ansPredicted)):
    if   data2["train"]["labels"][i] == ansPredicted[i]:
        count+=1
    else:
        pass



In [9]:

accuracy = count / len(ansPredicted) * 100
print(accuracy,"%")

82.75 %
