In [17]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/valuecategories/value-categories.json
/kaggle/input/projectdata/Project_Data/labels-test.tsv
/kaggle/input/projectdata/Project_Data/arguments-validation.tsv
/kaggle/input/projectdata/Project_Data/arguments-training.tsv
/kaggle/input/projectdata/Project_Data/README.md
/kaggle/input/projectdata/Project_Data/arguments-test.tsv
/kaggle/input/projectdata/Project_Data/labels-validation.tsv
/kaggle/input/projectdata/Project_Data/labels-training.tsv


In [18]:
import pandas as pd
import json

# Load the arguments data (assuming it has columns 'id', 'premise')
arguments_df = pd.read_csv('/kaggle/input/projectdata/Project_Data/arguments-training.tsv', delimiter='\t')

# Load the value labels data (assuming it has columns 'id', 'value_label')
labels_df = pd.read_csv('/kaggle/input/projectdata/Project_Data/labels-training.tsv', delimiter='\t')

# Load the value descriptions from a JSON file
with open('/kaggle/input/valuecategories/value-categories.json', 'r') as file:
    value_descriptions = json.load(file)
    
labels_long_df = labels_df.melt(id_vars='Argument ID', var_name='value_category', value_name='label')
labels_long_df = labels_long_df[labels_long_df['value_category'] != 'Universalism: objectivity']
labels_long_df['value_description'] = labels_long_df['value_category'].apply(lambda x: value_descriptions[x.lower().replace(": ","-")]['personal-motivation'])
combined_df = pd.merge(arguments_df, labels_long_df, left_on='Argument ID', right_on='Argument ID')
combined_df['Argument'] = combined_df.apply(
    lambda row: f"{row['Stance']} {row['Conclusion']} by saying {row['Premise']}",
    axis=1
)

In [19]:
final_df=combined_df
df_majority = final_df[final_df.label == 0]
df_minority = final_df[final_df.label == 1]

# Determine the number of instances you want to keep from the majority class
# For example, you might want to have a 1:1 ratio
number_of_instances = len(df_minority)

# Downsample the majority class
df_majority_downsampled = df_majority.sample(n=number_of_instances)

# Combine the downsampled majority class with the minority class to get a balanced dataset
balanced_df = pd.concat([df_majority_downsampled, df_minority])

# Shuffle the dataset to mix the two classes well
balanced_df = balanced_df.sample(frac=1).reset_index(drop=True)

In [20]:
balanced_df

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,value_category,label,value_description,Argument
0,A22325,We should adopt a multi-party system,against,this would result in inaccurate representation...,Tradition,1,"Maintain traditional beliefs and values, follo...",against We should adopt a multi-party system b...
1,A07038,We should abandon the use of school uniform,in favor of,Allowing students to make their own choices re...,Stimulation,1,"Always looking for something new to do, doing ...",in favor of We should abandon the use of schoo...
2,A09011,We should abolish the three-strikes laws,in favor of,Three strikes laws often punish those who have...,Conformity: interpersonal,0,"Avoid upsetting or annoying others, being tact...",in favor of We should abolish the three-strike...
3,A18105,We should adopt an austerity regime,in favor of,we should adopt an austerity regime so that fu...,Achievement,1,"Being ambitious, successful and being admired ...",in favor of We should adopt an austerity regim...
4,A18215,We should ban targeted killing,against,"we should not ban targeted killing, it preven...",Self-direction: action,0,It is important to make own decisions about li...,against We should ban targeted killing by sayi...
...,...,...,...,...,...,...,...,...
34627,A24033,We should subsidize journalism,in favor of,journalism needs diversitiy desperately becaus...,Self-direction: thought,1,"It is important to be creative, forming own op...",in favor of We should subsidize journalism by ...
34628,E02069,"We need a common migration and asylum policy, ...",against,"We should not bring the problem to Europe, but...",Power: resources,0,"Having lots of money for the power it brings, ...",against We need a common migration and asylum ...
34629,A25324,We should abolish the right to keep and bear arms,against,if the right to keep and bear arms was abolish...,Tradition,0,"Maintain traditional beliefs and values, follo...",against We should abolish the right to keep an...
34630,A26071,We should abolish the three-strikes laws,against,we should not abolish the three-strikes laws b...,Humility,0,"Try not to draw attention, be humble and satis...",against We should abolish the three-strikes la...


In [21]:
from torch.utils.data import DataLoader, TensorDataset, random_split
from transformers import AdamW, get_linear_schedule_with_warmup
from torch.nn.functional import cross_entropy
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
from sklearn.metrics import accuracy_score,f1_score
model_name = 'pepa/roberta-base-snli'
config = AutoModelForSequenceClassification.from_pretrained(model_name, 
                                                             return_dict=True,
                                                             output_hidden_states=False,
                                                             hidden_dropout_prob=0.3,  # Set dropout probability for hidden layers
                                                             attention_probs_dropout_prob=0.3)  # Set dropout probability for attention layers


# Verify that we are only optimizing biases

model = AutoModelForSequenceClassification.from_pretrained(model_name, config=config)
classifier_parameters = []
bias_parameters = []
for name, param in model.named_parameters():
    if 'classifier' in name or 'bias' in name:
        classifier_parameters.append(param)
    else:
        param.requires_grad = False
print(f"Total trainable parameters: {len(classifier_parameters)}")
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=model.to(device)
inputs = tokenizer(list(balanced_df['Argument']), list(balanced_df['value_description']), padding=True, truncation=True, return_tensors="pt")
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']
labels = torch.tensor(balanced_df['label'].values)

Total trainable parameters: 101


In [22]:
train_dataset = TensorDataset(input_ids, attention_mask, labels)
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=8)
optimizer = AdamW(classifier_parameters, lr=2e-5)
epochs=3
total_steps = len(train_dataloader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)
import numpy as np
# Define a training loop
for epoch in range(epochs):
    model.train()
    total_loss = 0
    predictions = []
    true_labels = []
    steps=1
    i=0
    for batch in train_dataloader:
        b_input_ids, b_attention_mask, b_labels = batch
        b_input_ids = b_input_ids.to(device)
        b_attention_mask = b_attention_mask.to(device)
        b_labels = b_labels.to(device)
        
        # Clear any previously calculated gradients
        optimizer.zero_grad()
        
        # Perform a forward pass. This will return logits.
        outputs = model(b_input_ids, attention_mask=b_attention_mask, labels=b_labels)
        
        # Calculate loss using the outputs and the labels
        loss = outputs[0]
        total_loss += loss.item()
        logits = outputs.logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        predictions.extend(np.argmax(logits, axis=1).flatten())
        true_labels.extend(label_ids.flatten())
        
        # Perform a backward pass to calculate gradients
        loss.backward()
        
        # Update parameters and take a step using the computed gradient
        optimizer.step()
        
        # Update the learning rate
        scheduler.step()
        if steps % 200 == 0:
            interim_f1 = f1_score(true_labels, predictions, average='macro')
            print(f"Epoch {epoch}, Step {steps}, Loss: {total_loss / steps:.4f}, Interim F1 Score: {interim_f1:.4f}")
            predictions = []  # Reset predictions
            true_labels = []  # Reset true labels
        steps+=1
    
    # Calculate the average loss over the training data
    avg_train_loss = total_loss / len(train_dataloader)


print("Training complete")



Epoch 0, Step 200, Loss: 0.9024, Interim F1 Score: 0.2858
Epoch 0, Step 400, Loss: 0.7875, Interim F1 Score: 0.6120
Epoch 0, Step 600, Loss: 0.7390, Interim F1 Score: 0.6499
Epoch 0, Step 800, Loss: 0.7104, Interim F1 Score: 0.6531
Epoch 0, Step 1000, Loss: 0.6941, Interim F1 Score: 0.6594
Epoch 0, Step 1200, Loss: 0.6844, Interim F1 Score: 0.6548
Epoch 0, Step 1400, Loss: 0.6751, Interim F1 Score: 0.6662
Epoch 0, Step 1600, Loss: 0.6674, Interim F1 Score: 0.6598
Epoch 0, Step 1800, Loss: 0.6612, Interim F1 Score: 0.6794
Epoch 0, Step 2000, Loss: 0.6570, Interim F1 Score: 0.6623
Epoch 0, Step 2200, Loss: 0.6536, Interim F1 Score: 0.6813
Epoch 0, Step 2400, Loss: 0.6503, Interim F1 Score: 0.6680
Epoch 0, Step 2600, Loss: 0.6483, Interim F1 Score: 0.6431
Epoch 0, Step 2800, Loss: 0.6443, Interim F1 Score: 0.6854
Epoch 0, Step 3000, Loss: 0.6412, Interim F1 Score: 0.6818
Epoch 0, Step 3200, Loss: 0.6378, Interim F1 Score: 0.6979
Epoch 0, Step 3400, Loss: 0.6355, Interim F1 Score: 0.6883
E

In [23]:
import pandas as pd
import json

# Load the arguments data (assuming it has columns 'id', 'premise')
arguments_df = pd.read_csv('/kaggle/input/projectdata/Project_Data/arguments-validation.tsv', delimiter='\t')

# Load the value labels data (assuming it has columns 'id', 'value_label')
labels_df = pd.read_csv('/kaggle/input/projectdata/Project_Data/labels-validation.tsv', delimiter='\t')

# Load the value descriptions from a JSON file
with open('/kaggle/input/valuecategories/value-categories.json', 'r') as file:
    value_descriptions = json.load(file)
    
labels_long_df = labels_df.melt(id_vars='Argument ID', var_name='value_category', value_name='label')
labels_long_df = labels_long_df[labels_long_df['value_category'] != 'Universalism: objectivity']
labels_long_df['value_description'] = labels_long_df['value_category'].apply(lambda x: value_descriptions[x.lower().replace(": ","-")]['personal-motivation'])
combined_df_val = pd.merge(arguments_df, labels_long_df, left_on='Argument ID', right_on='Argument ID')
combined_df_val['Argument'] = combined_df_val.apply(
    lambda row: f"{row['Stance']} {row['Conclusion']} by saying {row['Premise']}",
    axis=1
)

In [24]:
test_inputs = tokenizer(list(combined_df_val['Argument']), list(combined_df_val['value_description']), padding=True, truncation=True, return_tensors="pt")
test_input_ids = test_inputs['input_ids'].to(device)
test_attention_mask = test_inputs['attention_mask'].to(device)
test_labels = torch.tensor(combined_df_val['label'].values).to(device)

test_dataset = TensorDataset(test_input_ids, test_attention_mask, test_labels)
test_dataloader = DataLoader(test_dataset, batch_size=16)

# Function to evaluate the model on the test set
def evaluate_model(model, dataloader):
    model.eval()  # Set the model to evaluation mode
    predictions, true_labels = [], []

    with torch.no_grad():
        for batch in dataloader:
            b_input_ids, b_attention_mask, b_labels = [b.to(device) for b in batch]

            outputs = model(b_input_ids, attention_mask=b_attention_mask)
            logits = outputs.logits

            logits = logits.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()

            batch_predictions = np.argmax(logits, axis=1)
            predictions.extend(batch_predictions)
            true_labels.extend(label_ids)

    return predictions, true_labels

# Evaluate the model
# predictions, true_labels = evaluate_model(model, test_dataloader)

# # Calculate accuracy and F1 score
# accuracy = accuracy_score(true_labels, predictions)
# f1 = f1_score(true_labels, predictions, average='binary')

# print(f"Test Accuracy: {accuracy:.4f}")
# print(f"Test F1 Score: {f1:.4f}")

In [25]:
from torch.utils.data import Dataset,DataLoader
print(1)
class CategoryDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=512):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data.iloc[idx]
        premise = item['Argument']
        description = item['value_description']
        label = item['label']
        category = item['value_category']
        
        # Tokenize the text pair
        encoding = self.tokenizer(premise, description, add_special_tokens=True, 
                                  max_length=self.max_len, padding='max_length', 
                                  truncation=True, return_tensors="pt")
        
        input_ids = encoding['input_ids'].squeeze(0)  # Remove the batch dimension
        attention_mask = encoding['attention_mask'].squeeze(0)
        
        return input_ids, attention_mask, torch.tensor(label, dtype=torch.float), category
    
dataset = CategoryDataset(combined_df_val,tokenizer)
dataloader = DataLoader(dataset, batch_size=16, shuffle=False)

def evaluate_model(model, dataloader):
    model.eval()  # Set the model to evaluation mode
    all_logits = []
    all_labels = []
    all_categories = []

    with torch.no_grad():
        for input_ids, attention_mask, labels, categories in dataloader:
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits  # Using sigmoid for binary classification
            logits = logits.cpu().numpy()
            labels = labels.cpu().numpy()
            all_logits.extend(np.argmax(logits, axis=1))
            all_labels.extend(labels)
            all_categories.extend(categories)


    # Convert lists to numpy arrays
    all_logits = np.array(all_logits)
    all_labels = np.array(all_labels)
    all_categories = np.array(all_categories)

    return all_logits, all_labels, all_categories

# Get the predictions, true labels, and categories from the evaluation
logits, labels, categories = evaluate_model(model, dataloader)


1


In [26]:
f1_scores = {}
unique_categories = np.unique(categories)

for category in unique_categories:
    category_mask = (categories == category)
    cat_labels = labels[category_mask]
    cat_logits = logits[category_mask]
#     print(cat_labels.shape)
#     print(cat_logits.shape)
    # Ensure that the shape of cat_labels and cat_logits is one-dimensional
    cat_labels = np.squeeze(cat_labels)
    cat_logits = np.squeeze(cat_logits)

    # Calculate F1 score for the category
    f1 = f1_score(cat_labels, cat_logits, average='macro')
    f1_scores[category] = f1

# Print F1 scores for each category
avg_score=0
for category, score in f1_scores.items():
    print(f"F1 Score for {category}: {score:.4f}")
    avg_score+=score
avg_score=avg_score/19
print(avg_score)

F1 Score for Achievement: 0.5364
F1 Score for Benevolence: caring: 0.5144
F1 Score for Benevolence: dependability: 0.5046
F1 Score for Conformity: interpersonal: 0.4894
F1 Score for Conformity: rules: 0.5350
F1 Score for Face: 0.5152
F1 Score for Hedonism: 0.5954
F1 Score for Humility: 0.5041
F1 Score for Power: dominance: 0.5395
F1 Score for Power: resources: 0.6682
F1 Score for Security: personal: 0.2997
F1 Score for Security: societal: 0.3534
F1 Score for Self-direction: action: 0.5035
F1 Score for Self-direction: thought: 0.5372
F1 Score for Stimulation: 0.4953
F1 Score for Tradition: 0.6512
F1 Score for Universalism: concern: 0.2719
F1 Score for Universalism: nature: 0.7447
F1 Score for Universalism: tolerance: 0.5559
0.5165775782438357


In [27]:
import pandas as pd
import json

# Load the arguments data (assuming it has columns 'id', 'premise')
arguments_df = pd.read_csv('/kaggle/input/projectdata/Project_Data/arguments-test.tsv', delimiter='\t')

# Load the value labels data (assuming it has columns 'id', 'value_label')
labels_df = pd.read_csv('/kaggle/input/projectdata/Project_Data/labels-test.tsv', delimiter='\t')

# Load the value descriptions from a JSON file
with open('/kaggle/input/valuecategories/value-categories.json', 'r') as file:
    value_descriptions = json.load(file)
    
labels_long_df = labels_df.melt(id_vars='Argument ID', var_name='value_category', value_name='label')
labels_long_df = labels_long_df[labels_long_df['value_category'] != 'Universalism: objectivity']
labels_long_df['value_description'] = labels_long_df['value_category'].apply(lambda x: value_descriptions[x.lower().replace(": ","-")]['personal-motivation'])
combined_df_test = pd.merge(arguments_df, labels_long_df, left_on='Argument ID', right_on='Argument ID')
combined_df_test['Argument'] = combined_df_test.apply(
    lambda row: f"{row['Stance']} {row['Conclusion']} by saying {row['Premise']}",
    axis=1
)

In [28]:
test_inputs = tokenizer(list(combined_df_test['Argument']), list(combined_df_test['value_description']), padding=True, truncation=True, return_tensors="pt")
test_input_ids = test_inputs['input_ids'].to(device)
test_attention_mask = test_inputs['attention_mask'].to(device)
test_labels = torch.tensor(combined_df_test['label'].values).to(device)

test_dataset = TensorDataset(test_input_ids, test_attention_mask, test_labels)
test_dataloader = DataLoader(test_dataset, batch_size=16)

# Function to evaluate the model on the test set
def evaluate_model(model, dataloader):
    model.eval()  # Set the model to evaluation mode
    predictions, true_labels = [], []

    with torch.no_grad():
        for batch in dataloader:
            b_input_ids, b_attention_mask, b_labels = [b.to(device) for b in batch]

            outputs = model(b_input_ids, attention_mask=b_attention_mask)
            logits = outputs.logits

            logits = logits.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()

            batch_predictions = np.argmax(logits, axis=1)
            predictions.extend(batch_predictions)
            true_labels.extend(label_ids)

    return predictions, true_labels

# Evaluate the model
# predictions, true_labels = evaluate_model(model, test_dataloader)

# # Calculate accuracy and F1 score
# accuracy = accuracy_score(true_labels, predictions)
# f1 = f1_score(true_labels, predictions, average='binary')

# print(f"Test Accuracy: {accuracy:.4f}")
# print(f"Test F1 Score: {f1:.4f}")

In [29]:
from torch.utils.data import Dataset,DataLoader

class CategoryDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=512):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data.iloc[idx]
        premise = item['Argument']
        description = item['value_description']
        label = item['label']
        category = item['value_category']
        
        # Tokenize the text pair
        encoding = self.tokenizer(premise, description, add_special_tokens=True, 
                                  max_length=self.max_len, padding='max_length', 
                                  truncation=True, return_tensors="pt")
        
        input_ids = encoding['input_ids'].squeeze(0)  # Remove the batch dimension
        attention_mask = encoding['attention_mask'].squeeze(0)
        
        return input_ids, attention_mask, torch.tensor(label, dtype=torch.float), category
    
dataset = CategoryDataset(combined_df_test,tokenizer)
dataloader = DataLoader(dataset, batch_size=16, shuffle=False)

def evaluate_model(model, dataloader):
    model.eval()  # Set the model to evaluation mode
    all_logits = []
    all_labels = []
    all_categories = []

    with torch.no_grad():
        for input_ids, attention_mask, labels, categories in dataloader:
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits  # Using sigmoid for binary classification
            logits = logits.cpu().numpy()
            labels = labels.cpu().numpy()
            all_logits.extend(np.argmax(logits, axis=1))
            all_labels.extend(labels)
            all_categories.extend(categories)


    # Convert lists to numpy arrays
    all_logits = np.array(all_logits)
    all_labels = np.array(all_labels)
    all_categories = np.array(all_categories)

    return all_logits, all_labels, all_categories

# Get the predictions, true labels, and categories from the evaluation
logits, labels, categories = evaluate_model(model, dataloader)


In [30]:
f1_scores = {}
unique_categories = np.unique(categories)

for category in unique_categories:
    category_mask = (categories == category)
    cat_labels = labels[category_mask]
    cat_logits = logits[category_mask]
#     print(cat_labels.shape)
#     print(cat_logits.shape)
    # Ensure that the shape of cat_labels and cat_logits is one-dimensional
    cat_labels = np.squeeze(cat_labels)
    cat_logits = np.squeeze(cat_logits)

    # Calculate F1 score for the category
    f1 = f1_score(cat_labels, cat_logits, average='macro')
    f1_scores[category] = f1

# Print F1 scores for each category
avg_score=0
for category, score in f1_scores.items():
    print(f"F1 Score for {category}: {score:.4f}")
    avg_score+=score
avg_score=avg_score/19
print(avg_score)

F1 Score for Achievement: 0.4560
F1 Score for Benevolence: caring: 0.4287
F1 Score for Benevolence: dependability: 0.5323
F1 Score for Conformity: interpersonal: 0.5203
F1 Score for Conformity: rules: 0.5212
F1 Score for Face: 0.5221
F1 Score for Hedonism: 0.5853
F1 Score for Humility: 0.5202
F1 Score for Power: dominance: 0.5516
F1 Score for Power: resources: 0.6412
F1 Score for Security: personal: 0.2727
F1 Score for Security: societal: 0.2583
F1 Score for Self-direction: action: 0.4764
F1 Score for Self-direction: thought: 0.5095
F1 Score for Stimulation: 0.5050
F1 Score for Tradition: 0.6872
F1 Score for Universalism: concern: 0.2750
F1 Score for Universalism: nature: 0.7714
F1 Score for Universalism: tolerance: 0.5665
0.505310909511586


In [31]:
torch.save(model, 'model_entailment.pth')