In [None]:
import pandas as pd

In [None]:
def ibm_argumentative():
    df = pd.read_csv('Ibm-datasets/argumentative/set.csv')
    df = df.drop(columns = ['#positive', '#negative', 'sentence_internal_id', 'val', 'test'])
    df.columns = ['id', 'argument', 'tweet', 'topic', 'argumentative']
    return df

In [None]:
def ibm_evidence():
    df = pd.concat([pd.read_csv('Ibm-datasets/evidence/test.csv'), pd.read_csv('Ibm-datasets/evidence/train.csv')])
    df = df.drop(columns = ['the concept of the topic', 'candidate masked', 'wikipedia article name', 'wikipedia url'])
    df.columns = ['topic', 'tweet', 'evidence']
    return df

In [None]:
def ibm_procon():
    df = pd.read_csv('Ibm-datasets/procon/set.csv')
    df = df.drop(columns = ['split', 'topicTarget', 'topicSentiment', 'claims.claimId', 'claims.claimCorrectedText', 'claims.article.rawFile', 'claims.article.cleanFile', 'claims.article.rawSpan.start', 'claims.article.rawSpan.end', 'claims.article.cleanSpan.start', 'claims.article.cleanSpan.end', 'claims.Compatible','claims.claimSentiment', 'claims.targetsRelation', 'claims.claimTarget.span.end', 'claims.claimTarget.span.start', 'claims.claimTarget.text'])
    df.columns = ['id','topic', 'procon', 'tweet']
    df.procon = df.procon.apply(lambda x: 1 if x == 'PRO' else -1)
    return df

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
import pandas as pd
import numpy as np
import torch
from datasets import load_dataset, Dataset, load_metric
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score as f1, precision_score as ps, recall_score as rs
from transformers import pipeline

In [None]:
possible_tasks = ['argumentative', 'claim', 'evidence','procon'] 

def bert_baseline(df_train, df_test, model_name='bert-base-cased', tasks = possible_tasks, use_topic = True, seed = 42, batch_size = 5, epochs = 1):
    
    
    # Make sure task is correctly formatted
    if not isinstance(tasks, str) and not isinstance(tasks, list):
        raise ValueError("task must be list or str")
    
    if type(tasks) == str:
        tasks = [tasks]

    if not all(elem in possible_tasks for elem in tasks):
        raise ValueError("task must only contain any of the following strings: ", possible_tasks, ', but found:', tasks)
        
    metric = load_metric('accuracy')
    
    print('Loading tokenizer')
    tokenizer = AutoTokenizer.from_pretrained(model_name)
        
    def tokenize_function(examples):
        if use_topic: 
            tweet = f"{examples['topic']} [SEP] {examples['tweet']}"
        else:
            tweet = examples['tweet']
        
        return tokenizer(tweet, padding="max_length", truncation=True, add_special_tokens=True)

    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        predictions = np.argmax(logits, axis=-1)
        return metric.compute(predictions=predictions, references=labels)
    
        
    
            
    
    res = [] # Columns
    
    for task in tasks:
        print('Loading language model')
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
        
        print('Setting up dataset for', task)
        df_train_renamed = df_train.rename(columns={task: "labels"})
        df_test_renamed = df_test.rename(columns={task: "labels"})
        
        train_data = Dataset.from_pandas(df_train_renamed)
        test_data  = Dataset.from_pandas(df_test_renamed)
        
        
        train_processed = train_data.map(tokenize_function).map(lambda x: x, batched=True, batch_size = batch_size)
        test_processed = test_data.map(tokenize_function).map(lambda x: x, batched=True, batch_size = batch_size)
        
        train_full = train_processed.shuffle(seed=seed).remove_columns( df_train_renamed.loc[:, df_train_renamed.columns != 'labels'].columns)
        test_full  = test_processed.shuffle(seed=seed).remove_columns( df_test_renamed.loc[:, df_test_renamed.columns != 'labels'].columns)
        
        
        print('Setting up training')
        
        train_args = TrainingArguments(
            output_dir = model_name + '_' + task,
            per_device_train_batch_size = batch_size,
            num_train_epochs = epochs,
            evaluation_strategy='epoch'
        )
        
        trainer = Trainer(
            model = model,
            args = train_args,
            train_dataset = train_full,
            eval_dataset = test_full,
            compute_metrics = compute_metrics,
        )
        
        print('Training')
        trainer.train()
        
        print('Inferring')
        predictions = trainer.predict(test_full)
        preds = predictions.predictions.argmax(-1)
        
        label = df_test[task].to_numpy()
        #print(label, test_full['labels'], predictions.predictions, preds)
        
        res.append((task, f1(label, preds, average='micro'), ps(label, preds, average='micro'), rs(label, preds, average='micro')))
    
    res = pd.DataFrame(res)
    res.columns = ('Tasks','F1', 'Precision', 'Recall')
    res = res.style.set_caption('Results finetuning bert-model ' + model_name + ' on ' + task + ' dataset and applying to test set')
    return res

In [None]:
df = ibm_evidence()

In [None]:
bert_baseline(df, df, tasks = 'evidence', epochs = 5)