In [56]:
# pytorch
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import numpy as np

# pytorch lightning
from lightning import LightningModule
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from torchmetrics.classification import MultilabelF1Score

import pandas as pd
import utils

In [57]:

df_arg_train = pd.read_csv('./data/arguments-training.tsv', sep='\t')
df_arg_test = pd.read_csv('./data/arguments-test.tsv', sep='\t')
df_arg_val = pd.read_csv('./data/arguments-validation.tsv', sep='\t')

df_labels_train = pd.read_csv('./data/labels-training.tsv', sep='\t')
df_labels_test = pd.read_csv('./data/labels-test.tsv', sep='\t')
df_labels_val = pd.read_csv('./data/labels-validation.tsv', sep='\t')

df_labels_test.head()

Unnamed: 0,Argument ID,Self-direction: thought,Self-direction: action,Stimulation,Hedonism,Achievement,Power: dominance,Power: resources,Face,Security: personal,...,Tradition,Conformity: rules,Conformity: interpersonal,Humility,Benevolence: caring,Benevolence: dependability,Universalism: concern,Universalism: nature,Universalism: tolerance,Universalism: objectivity
0,A26004,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,1,0,1,0
1,A26010,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,1,0,1,1
2,A26016,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,1,1,0,0,0
3,A26024,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,A26026,0,0,0,0,1,0,0,0,1,...,0,0,0,0,1,1,0,0,0,0


In [58]:
level_3_categories = ["Openness to change", "Self-enhancement", "Conservation", "Self-transcendence"]

level_3_to_2_mapping = {
    "Openness to change": [
        "Self-direction: thought",
        "Self-direction: action",
        "Stimulation",
        "Hedonism",
    ],
    "Self-enhancement": [
        "Hedonism",
        "Achievement",
        "Power: dominance",
        "Power: resources",
        "Face",
    ],
    "Conservation": [
        "Security: personal",
        "Security: societal",
        "Conformity: rules",
        "Conformity: interpersonal",
        "Tradition",
        "Face",
        "Humility",
    ],
    "Self-transcendence": [
        "Benevolence: caring",
        "Benevolence: dependability",
        "Universalism: concern",
        "Universalism: nature",
        "Universalism: tolerance",
        "Universalism: objectivity",
        "Humility",
    ]
}

column_to_drop = [x for l in level_3_to_2_mapping.values() for x in l]

for category in level_3_categories:
    # make a logical OR of all the level 2 categories
    df_labels_test[category] = df_labels_test[level_3_to_2_mapping[category]].any(axis=1).map({True: 1, False: 0})
    df_labels_val[category] = df_labels_val[level_3_to_2_mapping[category]].any(axis=1).map({True: 1, False: 0})
    df_labels_train[category] = df_labels_train[level_3_to_2_mapping[category]].any(axis=1).map({True: 1, False: 0})

df_labels_test = df_labels_test.drop(columns=column_to_drop)
df_labels_val = df_labels_val.drop(columns=column_to_drop)
df_labels_train = df_labels_train.drop(columns=column_to_drop)

df_labels_test.head()

Unnamed: 0,Argument ID,Openness to change,Self-enhancement,Conservation,Self-transcendence
0,A26004,0,1,1,1
1,A26010,0,1,0,1
2,A26016,0,1,1,1
3,A26024,0,1,0,0
4,A26026,0,1,1,1


In [59]:
df_train = pd.merge(df_arg_train, df_labels_train, on='Argument ID')
df_test = pd.merge(df_arg_test, df_labels_test, on='Argument ID')
df_val = pd.merge(df_arg_val, df_labels_val, on='Argument ID')

df_train.head()

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,Openness to change,Self-enhancement,Conservation,Self-transcendence
0,A01002,We should ban human cloning,in favor of,we should ban human cloning as it will only ca...,0,0,1,0
1,A01005,We should ban fast food,in favor of,fast food should be banned because it is reall...,0,0,1,0
2,A01006,We should end the use of economic sanctions,against,sometimes economic sanctions are the only thin...,0,1,1,0
3,A01007,We should abolish capital punishment,against,capital punishment is sometimes the only optio...,0,0,1,1
4,A01008,We should ban factory farming,against,factory farming allows for the production of c...,0,0,1,1


### Task 1.5 Encoding

In [60]:
# Encode stance into 0, 1 

df_train["Stance"] = df_train["Stance"].map({"in favor of": 1, "against": 0})
df_test["Stance"] = df_test["Stance"].map({"in favor of": 1, "against": 0})
df_val["Stance"] = df_val["Stance"].map({"in favor of": 1, "against": 0})

df_train.head()

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,Openness to change,Self-enhancement,Conservation,Self-transcendence
0,A01002,We should ban human cloning,1,we should ban human cloning as it will only ca...,0,0,1,0
1,A01005,We should ban fast food,1,fast food should be banned because it is reall...,0,0,1,0
2,A01006,We should end the use of economic sanctions,0,sometimes economic sanctions are the only thin...,0,1,1,0
3,A01007,We should abolish capital punishment,0,capital punishment is sometimes the only optio...,0,0,1,1
4,A01008,We should ban factory farming,0,factory farming allows for the production of c...,0,0,1,1


## Dataset definition

In [61]:
class ArgumentDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        return {
            "Premise": row["Premise"],
            "Conclusion": row["Conclusion"],
            "labels": torch.tensor(row[level_3_categories].values.tolist(), dtype=torch.float32),
            "Stance": torch.tensor(row["Stance"], dtype=torch.float32)
        }

In [62]:
train_dataset = ArgumentDataset(df_train)
test_dataset = ArgumentDataset(df_test)
val_dataset = ArgumentDataset(df_val)
# Create the dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)

print(train_dataset[0])

{'Premise': 'we should ban human cloning as it will only cause huge issues when you have a bunch of the same humans running around all acting the same.', 'Conclusion': 'We should ban human cloning', 'labels': tensor([0., 0., 1., 0.]), 'Stance': tensor(1.)}


## Task 3 Metric definition

## Task 2 Model definition

### Random and Majority Classifier

In [63]:
class RandomUniformClassifier(LightningModule):
    def __init__(self):
        self._random_state = np.random.RandomState()

    def predict(self, X):
        batch_size = X.shape[0]
        logits = self._random_state.uniform(size=(batch_size, 4))
        logits = logits > 0.5
        return torch.tensor(logits, dtype=torch.float32)


class MajorityClassifier(LightningModule):
    def __init__(self, n_random_classifiers=10):
        self.n_random_classifiers = n_random_classifiers
        self.random_classifiers = [RandomUniformClassifier() for _ in range(n_random_classifiers)]

    def predict(self, X):
        batch_size = X.shape[0]
        votes = torch.zeros((batch_size, 4))
        for clf in self.random_classifiers:
            votes += clf.predict(X)
        votes = votes / self.n_random_classifiers
        votes = votes > 0.5
        return torch.tensor(votes, dtype=torch.float32)

### Bert models

In [64]:
from transformers import BertModel, BertTokenizer


class BertConclusion(LightningModule):
    def __init__(self, bert_model_name, num_classes):
        super().__init__()
        self.save_hyperparameters()

        self.tokenizer = BertTokenizer.from_pretrained(bert_model_name)
        self.bert = BertModel.from_pretrained(bert_model_name)
        for param in self.bert.parameters():
            param.requires_grad = False

        self.classifier = nn.Linear(self.bert.config.hidden_size, num_classes)

        self.f1_metric = MultilabelF1Score(4, average=None)

    def forward(self, encoded):
        outputs = self.bert(**encoded)
        logits = self.classifier(outputs.last_hidden_state[:, 0, :])
        return logits

    def training_step(self, batch, batch_idx):
        data = batch
        X = data["Conclusion"]
        y = data["labels"]

        encoded = self.tokenizer(X, padding=True, truncation=True, return_tensors="pt")
        logits = self(encoded)

        loss = nn.BCEWithLogitsLoss()(logits, y)
        self.log("train_loss", loss, on_epoch=True, prog_bar=True, logger=True)

        f1_score_per_class = self.f1_metric(logits, y)
        f1_score_mean = torch.mean(f1_score_per_class)

        self.log("train_f1_score", f1_score_mean, on_epoch=True, prog_bar=True, logger=True)

        for i, category in enumerate(level_3_categories):
            self.log(f"train_f1_score_{category}", f1_score_per_class[i], on_epoch=True, prog_bar=True, logger=True)

        return loss

    def validation_step(self, batch, batch_idx):
        data = batch
        X = data["Conclusion"]
        y = data["labels"]

        encoded = self.tokenizer(X, padding=True, truncation=True, return_tensors="pt")
        logits = self(encoded)

        loss = nn.BCEWithLogitsLoss()(logits, y)
        self.log("val_loss", loss, on_epoch=True, prog_bar=True, logger=True)

        f1_score_per_class = self.f1_metric(logits, y)
        f1_score_mean = torch.mean(f1_score_per_class)

        self.log("val_f1_score", f1_score_mean, on_epoch=True, prog_bar=True, logger=True)

        for i, category in enumerate(level_3_categories):
            self.log(f"val_f1_score_{category}", f1_score_per_class[i], on_epoch=True, prog_bar=True, logger=True)

        return loss

    def test_step(self, batch, batch_idx):
        data = batch
        X = data["Conclusion"]
        y = data["labels"]

        encoded = self.tokenizer(X, padding=True, truncation=True, return_tensors="pt")
        logits = self(encoded)

        loss = nn.BCEWithLogitsLoss()(logits, y)
        self.log("test_loss", loss, on_epoch=True, prog_bar=True, logger=True)

        f1_score_per_class = self.f1_metric(logits, y)
        f1_score_mean = torch.mean(f1_score_per_class)

        self.log("test_f1_score", f1_score_mean, on_epoch=True, prog_bar=True, logger=True)

        for i, category in enumerate(level_3_categories):
            self.log(f"test_f1_score_{category}", f1_score_per_class[i], on_epoch=True, prog_bar=True, logger=True)

        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-5)

In [65]:
class BertPremiseConclusion(LightningModule):
    def __init__(self, bert_model_name, num_classes):
        super().__init__()
        self.save_hyperparameters()

        self.tokenizer = BertTokenizer.from_pretrained(bert_model_name)
        self.bert = BertModel.from_pretrained(bert_model_name)
        for param in self.bert.parameters():
            param.requires_grad = False

        self.classifier = nn.Linear(self.bert.config.hidden_size * 2, num_classes)

        self.f1_metric = MultilabelF1Score(4, average=None)

    def forward(self, encoded_1, encoded_2):
        output_1 = self.bert(**encoded_1)
        output_2 = self.bert(**encoded_2)

        output = torch.cat((output_1.last_hidden_state[:, 0, :], output_2.last_hidden_state[:, 0, :]), dim=1)

        logits = self.classifier(output)
        return logits

    def training_step(self, batch, batch_idx):
        data = batch

        X_1, X_2 = data["Conclusion"], data["Conclusion"]
        y = data["labels"]

        encoded_1 = self.tokenizer(X_1, padding=True, truncation=True, return_tensors="pt")
        encoded_2 = self.tokenizer(X_2, padding=True, truncation=True, return_tensors="pt")

        logits = self(encoded_1, encoded_2)

        loss = nn.BCEWithLogitsLoss()(logits, y)
        self.log("train_loss", loss, on_epoch=True, prog_bar=True, logger=True)

        f1_score_per_class = self.f1_metric(logits, y)
        f1_score_mean = torch.mean(f1_score_per_class)

        self.log("train_f1_score", f1_score_mean, on_epoch=True, prog_bar=True, logger=True)

        for i, category in enumerate(level_3_categories):
            self.log(f"train_f1_score_{category}", f1_score_per_class[i], on_epoch=True, prog_bar=True, logger=True)

        return loss

    def validation_step(self, batch, batch_idx):
        data = batch

        X_1, X_2 = data["Conclusion"], data["Conclusion"]
        y = data["labels"]

        encoded_1 = self.tokenizer(X_1, padding=True, truncation=True, return_tensors="pt")
        encoded_2 = self.tokenizer(X_2, padding=True, truncation=True, return_tensors="pt")

        logits = self(encoded_1, encoded_2)

        loss = nn.BCEWithLogitsLoss()(logits, y)
        self.log("val_loss", loss, on_epoch=True, prog_bar=True, logger=True)

        f1_score_per_class = self.f1_metric(logits, y)
        f1_score_mean = torch.mean(f1_score_per_class)

        self.log("val_f1_score", f1_score_mean, on_epoch=True, prog_bar=True, logger=True)

        for i, category in enumerate(level_3_categories):
            self.log(f"val_f1_score_{category}", f1_score_per_class[i], on_epoch=True, prog_bar=True, logger=True)

        return loss

    def test_step(self, batch, batch_idx):
        data = batch

        X_1, X_2 = data["Conclusion"], data["Conclusion"]
        y = data["labels"]

        encoded_1 = self.tokenizer(X_1, padding=True, truncation=True, return_tensors="pt")
        encoded_2 = self.tokenizer(X_2, padding=True, truncation=True, return_tensors="pt")

        logits = self(encoded_1, encoded_2)

        loss = nn.BCEWithLogitsLoss()(logits, y)
        self.log("test_loss", loss, on_epoch=True, prog_bar=True, logger=True)

        f1_score_per_class = self.f1_metric(logits, y)
        f1_score_mean = torch.mean(f1_score_per_class)

        self.log("test_f1_score", f1_score_mean, on_epoch=True, prog_bar=True, logger=True)

        for i, category in enumerate(level_3_categories):
            self.log(f"test_f1_score_{category}", f1_score_per_class[i], on_epoch=True, prog_bar=True, logger=True)

        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-5)

In [66]:
class BertPremiseConclusionStance(LightningModule):
    def __init__(self, bert_model_name, num_classes):
        super().__init__()
        self.save_hyperparameters()

        self.tokenizer = BertTokenizer.from_pretrained(bert_model_name)
        self.bert = BertModel.from_pretrained(bert_model_name)
        for param in self.bert.parameters():
            param.requires_grad = False

        self.classifier = nn.Linear(self.bert.config.hidden_size * 2 + 1, num_classes)

        self.f1_metric = MultilabelF1Score(4, average=None)

    def forward(self, encoded_1, encoded_2, stance):
        output_1 = self.bert(**encoded_1).last_hidden_state[:, 0, :]
        output_2 = self.bert(**encoded_2).last_hidden_state[:, 0, :]
        stance = stance.unsqueeze(1)
        output = torch.cat((output_1, output_2, stance), dim=1)
        logits = self.classifier(output)
        return logits

    def training_step(self, batch, batch_idx):
        data = batch

        X_1, X_2, stance = data["Premise"], data["Conclusion"], data["Stance"]
        y = data["labels"]

        encoded_1 = self.tokenizer(X_1, padding=True, truncation=True, return_tensors="pt")
        encoded_2 = self.tokenizer(X_2, padding=True, truncation=True, return_tensors="pt")

        logits = self(encoded_1, encoded_2, stance)

        loss = nn.BCEWithLogitsLoss()(logits, y)
        self.log("train_loss", loss, on_epoch=True, prog_bar=True, logger=True)

        f1_score_per_class = self.f1_metric(logits, y)
        f1_score_mean = torch.mean(f1_score_per_class)

        self.log("train_f1_score", f1_score_mean, on_epoch=True, prog_bar=True, logger=True)

        for i, category in enumerate(level_3_categories):
            self.log(f"train_f1_score_{category}", f1_score_per_class[i], on_epoch=True, prog_bar=True, logger=True)

        return loss

    def validation_step(self, batch, batch_idx):
        data = batch

        X_1, X_2, stance = data["Premise"], data["Conclusion"], data["Stance"]
        y = data["labels"]

        encoded_1 = self.tokenizer(X_1, padding=True, truncation=True, return_tensors="pt")
        encoded_2 = self.tokenizer(X_2, padding=True, truncation=True, return_tensors="pt")

        logits = self(encoded_1, encoded_2, stance)

        loss = nn.BCEWithLogitsLoss()(logits, y)
        self.log("val_loss", loss, on_epoch=True, prog_bar=True, logger=True)

        f1_score_per_class = self.f1_metric(logits, y)
        f1_score_mean = torch.mean(f1_score_per_class)

        self.log("val_f1_score", f1_score_mean, on_epoch=True, prog_bar=True, logger=True)

        for i, category in enumerate(level_3_categories):
            self.log(f"val_f1_score_{category}", f1_score_per_class[i], on_epoch=True, prog_bar=True, logger=True)

        return loss

    def test_step(self, batch, batch_idx):
        data = batch

        X_1, X_2, stance = data["Premise"], data["Conclusion"], data["Stance"]
        y = data["labels"]

        encoded_1 = self.tokenizer(X_1, padding=True, truncation=True, return_tensors="pt")
        encoded_2 = self.tokenizer(X_2, padding=True, truncation=True, return_tensors="pt")

        logits = self(encoded_1, encoded_2, stance)

        loss = nn.BCEWithLogitsLoss()(logits, y)
        self.log("test_loss", loss, on_epoch=True, prog_bar=True, logger=True)

        f1_score_per_class = self.f1_metric(logits, y)
        f1_score_mean = torch.mean(f1_score_per_class)

        self.log("test_f1_score", f1_score_mean, on_epoch=True, prog_bar=True, logger=True)

        for i, category in enumerate(level_3_categories):
            self.log(f"test_f1_score_{category}", f1_score_per_class[i], on_epoch=True, prog_bar=True, logger=True)

        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-5)

In [67]:
# Fix all possible sources of randomness
torch.use_deterministic_algorithms(True)

torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [68]:
from pathlib import Path

logs_path = Path.cwd() / "logs" / "lightning_logs"
train = False

seeds = [6, 90, 157]

epochs = 1
output_dim = len(level_3_categories)  # +1 for padding

model_classes = [BertConclusion, BertPremiseConclusion, BertPremiseConclusionStance]
model_names = ["bert_w_c", "bert_w_cp", "bert_w_cps"]
hyperparameters = [
    {'bert_model_name': 'bert-base-uncased', 'num_classes': output_dim},
    {'bert_model_name': 'bert-base-uncased', 'num_classes': output_dim},
    {'bert_model_name': 'bert-base-uncased', 'num_classes': output_dim}
]

if train:
    for model_class, model_name, hyperparameter in zip(model_classes, model_names, hyperparameters):
        for seed in seeds:
            print(f"Training model {model_name} with seed {seed}...")
            seed_everything(seed, workers=True)

            model = model_class(**hyperparameter)

            logger = TensorBoardLogger(logs_path, name=f"{model_name}_seed{seed}")
            checkpoint_callback = ModelCheckpoint(
                monitor='val_loss',
                dirpath=None,
                filename=f'{model_name}-seed={seed}' + '-{epoch:02d}-{val_loss:.2f}-{val_f1:.2f}',
                save_top_k=1,
            )
            early_stop_callback = EarlyStopping(
                monitor='val_loss',
                patience=3,
                verbose=True,
                mode='min'
            )

            trainer = Trainer(
                max_epochs=epochs,
                logger=logger,
                log_every_n_steps=1,
                callbacks=[checkpoint_callback, early_stop_callback],
                deterministic=True
            )

            trainer.fit(model, train_dataloader, val_dataloader)
else:
    print("Skipping training...")

Skipping training...


In [None]:
def evaluate_model(model, loader, model_type):
    if model_type == "bert_w_c":
        prediction = utils.model_bert_c_predict(model, loader)
    elif model_type == "bert_w_cp":
        prediction = utils.model_bert_cp_predict(model, loader)
    elif model_type == "bert_w_cps":
        prediction = utils.model_bert_cps_predict(model, loader)
    else:
        raise ValueError("Invalid model type")

    f1_metric = MultilabelF1Score(num_labels=4, average=None, multidim_average='global')
    
    #Take the target from the loader
    target = torch.cat([data["labels"] for data in loader], dim=0)

    results = f1_metric(prediction, target)
    average = sum(results) / 4

    print("F1 Score for each class:")
    for i, category in enumerate(level_3_categories):
        print(f"{category}: {results[i]:.4f}")

    print(f"Overall F1 Score: {average:.4f}")

    return average
