# Library Imports

In [1]:
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score

import numpy as np
import pandas as pd

from tabulate import tabulate
from tqdm import trange
import random

# Dataframe load and Treatment

In this settings, I transform the csv file dataset from IBM Project Debater into an pandas dataframe df. Different slices of this pandas dataframe will be used in the different experiments in this analysis.

Also, in this section I define some global variables, such as api_Key, that will be necessary to run some models.

In [2]:
df = pd.read_csv('')  

In [3]:
'''
This API key is given by the early access program of the IBM Debater, for academic use only, and it is requested 
via e-mail to the IBM Debater Team. It is important to notice that, in order to make the IBM Debater API work,
you need to clone the IBM Debater API repository.
'''

api_key = '' 

In [4]:
tokens = ['bert-base-uncased', 'bert-large-uncased', 'bert-base-cased-finetuned-mrpc']

In [5]:
df.dropna(inplace = True)

In [6]:
df.reset_index(drop = True, inplace = True)

In [7]:
#Making adjustments for model comprehensions
df = df.replace('PRO', int(1))
df = df.replace('CON', int(0))
df= df.replace(-1, int(0))

# Data Frame Separation

In this section, I select the important parts of the pandas dataframe, which are: the topic and claim text (str), the topic and claim sentiment classification (int, [-1,1]), the claim and target relation classification (int, [-1,1]), the stance (str, ['PRO', 'CON']) and the split (str, ['train','test']).    

The texts will be used in the evaluated models, the classifications will be used to reproduce the formula of the refered article, the stance will be used to evaluate those models and the separation will be used to separate the inputs of the model

In [8]:
# Dividing the orifginal data frame into four dataframes
df_topicSentiment = df[['topicText', 'topicSentiment', 'split']] 
df_claimSentiment = df[['claims.claimOriginalText', 'claims.claimSentiment', 'split']] 
df_targetsRelation = df[['topicText', 'claims.claimOriginalText', 'claims.targetsRelation', 'split']]
df_resposta = df[['claims.stance', 'split']]

In [9]:
#Renbaming colums 
df_topicSentiment.set_axis(['text', 'labels', 'split'], axis='columns', inplace=True)
df_claimSentiment.set_axis(['text', 'labels', 'split'], axis='columns', inplace=True)
df_targetsRelation.set_axis(['textTopic', 'textClaim', 'labels', 'split'], axis='columns', inplace=True)
df_resposta.set_axis(['resuls', 'split'], axis='columns', inplace=True)

In [10]:
#Adjusting df_targestRelation to have an single entry of two sentences (topic and claim) with an separation encoding
df_targetsRelation['text'] = df_targetsRelation['textTopic'] + ' [SEP] [CLS] ' + df_targetsRelation['textClaim']
#df_targetsRelation['text'] = df_targetsRelation.apply(lambda row: row.textTopic + ' </s> ' + row.textClaim, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_targetsRelation['text'] = df_targetsRelation['textTopic'] + ' [SEP] [CLS] ' + df_targetsRelation['textClaim']


In [11]:
split_type = int(input("1-IBM, 2-Random: "))
input_type = int(input("Are You going to train using the refered article's formula [type 1] or with just an topic and claim input [type 2]? "))
token_choice = int(input('Wich tokenizer do you want to use? bert-base-uncased [type 1], bert-large-uncased [type 2] or bert-base-cased-finetuned-mrpc [type 3]? '))

1-IBM, 2-Random: 2
Are You going to train using the refered article's formula [type 1] or with just an topic and claim input [type 2]? 1
Wich tokenizer do you want to use? bert-base-uncased [type 1], bert-large-uncased [type 2] or bert-base-cased-finetuned-mrpc [type 3]? 2


In [12]:
if split_type == 1:
    df_topicSentiment_train = df_topicSentiment.loc[df['split'] == 'train']
    df_claimSentiment_train = df_claimSentiment.loc[df['split'] == 'train']
    df_targetsRelation_train = df_targetsRelation.loc[df['split'] == 'train']
    df_resposta_train = df_resposta.loc[df['split'] == 'train']

    df_topicSentiment_val = df_topicSentiment.loc[df['split'] == 'test']
    df_claimSentiment_val = df_claimSentiment.loc[df['split'] == 'test']
    df_targetsRelation_val = df_targetsRelation.loc[df['split'] == 'test']
    df_resposta_val = df_resposta.loc[df['split'] == 'test']
    
    #Making the the entry of the models
    text = [df_topicSentiment_train['text'].values, df_claimSentiment_train['text'].values, df_targetsRelation_train['text'].values,
       df_topicSentiment_val['text'].values, df_claimSentiment_val['text'].values, df_targetsRelation_val['text'].values]
    labels = [df_topicSentiment_train['labels'].values, df_claimSentiment_train['labels'].values, df_targetsRelation_train['labels'].values,
         df_topicSentiment_val['labels'].values, df_claimSentiment_val['labels'].values, df_targetsRelation_val['labels'].values,
         df_resposta_val['resuls'].values]

if split_type == 2:
    
    #Making the the entry of the models
    text = [df_topicSentiment['text'].values, df_claimSentiment['text'].values, df_targetsRelation['text'].values]
    labels = [df_topicSentiment['labels'].values, df_claimSentiment['labels'].values, df_targetsRelation['labels'].values,
             df_resposta['resuls'].values]

# Functions Definitions

In [13]:
#Function for tokening

'''
  Returns <class transformers.tokenization_utils_base.BatchEncoding> with the following fields:
    - input_ids: list of token ids
    - token_type_ids: list of token type ids
    - attention_mask: list of indices (0,1) specifying which tokens should considered by the model (return_attention_mask = True).
'''

def preprocessing(input_text, tokenizer):
        return tokenizer.encode_plus(
                            input_text
                            ,add_special_tokens = True
                            ,max_length = 32
                            ,pad_to_max_length = True
                            ,return_attention_mask = True
                            ,return_tensors = 'pt'
                       )

In [14]:
#Function for model evaluation

def b_tp(preds, labels):
  '''Returns True Positives (TP): count of correct predictions of actual class 1'''
  return sum([preds == labels and preds == 1 for preds, labels in zip(preds, labels)])

def b_fp(preds, labels):
  '''Returns False Positives (FP): count of wrong predictions of actual class 1'''
  return sum([preds != labels and preds == 1 for preds, labels in zip(preds, labels)])

def b_tn(preds, labels):
  '''Returns True Negatives (TN): count of correct predictions of actual class 0'''
  return sum([preds == labels and preds == 0 for preds, labels in zip(preds, labels)])

def b_fn(preds, labels):
  '''Returns False Negatives (FN): count of wrong predictions of actual class 0'''
  return sum([preds != labels and preds == 0 for preds, labels in zip(preds, labels)])

def b_metrics(preds, labels):
  '''
  Returns the following metrics:
    - accuracy    = (TP + TN) / N
    - precision   = TP / (TP + FP)
    - recall      = TP / (TP + FN)
    - specificity = TN / (TN + FP)
  '''
  preds = np.argmax(preds, axis = 1).flatten()
  labels = labels.flatten()
  tp = b_tp(preds, labels)
  tn = b_tn(preds, labels)
  fp = b_fp(preds, labels)
  fn = b_fn(preds, labels)
  b_accuracy = (tp + tn) / len(labels)
  b_precision = tp / (tp + fp) if (tp + fp) > 0 else 'nan'
  b_recall = tp / (tp + fn) if (tp + fn) > 0 else 'nan'
  b_specificity = tn / (tn + fp) if (tn + fp) > 0 else 'nan'
  return b_accuracy, b_precision, b_recall, b_specificity

# Preparing Data for Training

In [15]:
token = tokens[token_choice-1]
tokenizer = BertTokenizer.from_pretrained(token, do_lower_case = True)

In [16]:
if split_type == 1:
    
    #TopicSentiment Train
    token_id_topic_train = []
    attention_masks_topic_train = []

    for sample in text[0]:
      encoding_dict_topic_train = preprocessing(sample, tokenizer)
      token_id_topic_train.append(encoding_dict_topic_train['input_ids']) 
      attention_masks_topic_train.append(encoding_dict_topic_train['attention_mask'])

    token_id_topic_train = torch.cat(token_id_topic_train, dim = 0)
    attention_masks_topic_train = torch.cat(attention_masks_topic_train, dim = 0)
    labels_topic_train = torch.tensor(labels[0])
    labels_topic_train =  torch.nan_to_num(labels_topic_train)
    labels_topic_train = labels_topic_train.type(torch.int64)

    #TopicSentiment Val
    token_id_topic_val = []
    attention_masks_topic_val = []

    for sample in text[3]:
      encoding_dict_topic_val = preprocessing(sample, tokenizer)
      token_id_topic_val.append(encoding_dict_topic_val['input_ids']) 
      attention_masks_topic_val.append(encoding_dict_topic_val['attention_mask'])

    token_id_topic_val = torch.cat(token_id_topic_val, dim = 0)
    attention_masks_topic_val = torch.cat(attention_masks_topic_val, dim = 0)
    labels_topic_val = torch.tensor(labels[3])
    labels_topic_val =  torch.nan_to_num(labels_topic_val)
    labels_topic_val = labels_topic_val.type(torch.int64)

    #ClaimSentiment Train
    token_id_claim_train = []
    attention_masks_claim_train = []

    for sample in text[1]:
      encoding_dict_claim_train = preprocessing(sample, tokenizer)
      token_id_claim_train.append(encoding_dict_claim_train['input_ids']) 
      attention_masks_claim_train.append(encoding_dict_claim_train['attention_mask'])

    token_id_claim_train = torch.cat(token_id_claim_train, dim = 0)
    attention_masks_claim_train = torch.cat(attention_masks_claim_train, dim = 0)
    labels_claim_train = torch.tensor(labels[1])
    labels_claim_train =  torch.nan_to_num(labels_claim_train)
    labels_claim_train = labels_claim_train.type(torch.int64)

    #ClaimSentiment Val
    token_id_claim_val = []
    attention_masks_claim_val = []

    for sample in text[4]:
      encoding_dict_claim_val = preprocessing(sample, tokenizer)
      token_id_claim_val.append(encoding_dict_claim_val['input_ids']) 
      attention_masks_claim_val.append(encoding_dict_claim_val['attention_mask'])

    token_id_claim_val = torch.cat(token_id_claim_val, dim = 0)
    attention_masks_claim_val = torch.cat(attention_masks_claim_val, dim = 0)
    labels_claim_val = torch.tensor(labels[4])
    labels_claim_val =  torch.nan_to_num(labels_claim_val)
    labels_claim_val = labels_claim_val.type(torch.int64)

    #RelationSentiment Train
    token_id_relation_train = []
    attention_masks_relation_train = []

    for sample in text[2]:
      encoding_dict_relation_train = preprocessing(sample, tokenizer)
      token_id_relation_train.append(encoding_dict_relation_train['input_ids']) 
      attention_masks_relation_train.append(encoding_dict_relation_train['attention_mask'])

    token_id_relation_train = torch.cat(token_id_relation_train, dim = 0)
    attention_masks_relation_train = torch.cat(attention_masks_relation_train, dim = 0)
    labels_relation_train = torch.tensor(labels[2])
    labels_relation_train =  torch.nan_to_num(labels_relation_train)
    labels_relation_train = labels_relation_train.type(torch.int64)

    #RelationSentiment Val_1
    token_id_relation_val = []
    attention_masks_relation_val = []

    for sample in text[5]:
      encoding_dict_relation_val = preprocessing(sample, tokenizer)
      token_id_relation_val.append(encoding_dict_relation_val['input_ids']) 
      attention_masks_relation_val.append(encoding_dict_relation_val['attention_mask'])

    token_id_relation_val = torch.cat(token_id_relation_val, dim = 0)
    attention_masks_relation_val = torch.cat(attention_masks_relation_val, dim = 0)
    labels_relation_val = torch.tensor(labels[5])
    labels_relation_val =  torch.nan_to_num(labels_relation_val)
    labels_relation_val = labels_relation_val.type(torch.int64)

    #RelationSentiment Val_2
    token_id_relation_val_general = []
    attention_masks_relation_val_general = []

    for sample in text[5]:
      encoding_dict_relation_val_general = preprocessing(sample, tokenizer)
      token_id_relation_val_general.append(encoding_dict_relation_val_general['input_ids']) 
      attention_masks_relation_val_general.append(encoding_dict_relation_val_general['attention_mask'])

    token_id_relation_val_general = torch.cat(token_id_relation_val_general, dim = 0)
    attention_masks_relation_val_general = torch.cat(attention_masks_relation_val_general, dim = 0)
    labels_relation_val_general = torch.tensor(labels[6])
    labels_relation_val_general =  torch.nan_to_num(labels_relation_val_general)
    labels_relation_val_general = labels_relation_val_general.type(torch.int64)
    
if split_type == 2:
    
    #Topic
    token_id_topic = []
    attention_masks_topic = []

    for sample in text[0]:
      encoding_dict_topic = preprocessing(sample, tokenizer)
      token_id_topic.append(encoding_dict_topic['input_ids']) 
      attention_masks_topic.append(encoding_dict_topic['attention_mask'])

    token_id_topic = torch.cat(token_id_topic, dim = 0)
    attention_masks_topic = torch.cat(attention_masks_topic, dim = 0)
    labels_topic = torch.tensor(labels[0])
    labels_topic =  torch.nan_to_num(labels_topic)
    labels_topic = labels_topic.type(torch.int64)
    
    #Claim
    token_id_claim = []
    attention_masks_claim = []
    
    for sample in text[1]:
      encoding_dict_claim = preprocessing(sample, tokenizer)
      token_id_claim.append(encoding_dict_claim['input_ids']) 
      attention_masks_claim.append(encoding_dict_claim['attention_mask'])

    token_id_claim = torch.cat(token_id_claim, dim = 0)
    attention_masks_claim = torch.cat(attention_masks_claim, dim = 0)
    labels_claim = torch.tensor(labels[1])
    labels_claim =  torch.nan_to_num(labels_claim)
    labels_claim = labels_claim.type(torch.int64)
    
    #Relation
    token_id_relation = []
    attention_masks_relation = []
    
    for sample in text[2]:
      encoding_dict_relation = preprocessing(sample, tokenizer)
      token_id_relation.append(encoding_dict_relation['input_ids']) 
      attention_masks_relation.append(encoding_dict_relation['attention_mask'])

    token_id_relation = torch.cat(token_id_relation, dim = 0)
    attention_masks_relation = torch.cat(attention_masks_relation, dim = 0)
    labels_relation = torch.tensor(labels[2])
    labels_relation =  torch.nan_to_num(labels_relation)
    labels_relation = labels_relation.type(torch.int64)
    
    token_id_relation_pure = []
    attention_masks_relation_pure = []
    
    for sample in text[2]:
      encoding_dict_relation_pure = preprocessing(sample, tokenizer)
      token_id_relation_pure.append(encoding_dict_relation_pure['input_ids']) 
      attention_masks_relation_pure.append(encoding_dict_relation_pure['attention_mask'])

    token_id_relation_pure = torch.cat(token_id_relation_pure, dim = 0)
    attention_masks_relation_pure = torch.cat(attention_masks_relation_pure, dim = 0)
    labels_relation_pure = torch.tensor(labels[3])
    labels_relation_pure =  torch.nan_to_num(labels_relation_pure)
    labels_relation_pure = labels_relation_pure.type(torch.int64)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [17]:
if split_type == 1: 
    if input_type == 1:
        # Train and validation sets for claimclaim
        train_set_claimSentiment = TensorDataset(token_id_claim_train, 
                                  attention_masks_claim_train, 
                                  labels_claim_train)

        val_set_claimSentiment = TensorDataset(token_id_claim_val, 
                                  attention_masks_claim_val, 
                                  labels_claim_val)

        # Train and validation sets for topic
        train_set_topicSentiment = TensorDataset(token_id_topic_train, 
                                  attention_masks_topic_train, 
                                  labels_topic_train)

        val_set_topicSentiment = TensorDataset(token_id_topic_val, 
                                  attention_masks_topic_val, 
                                  labels_topic_val)

        # Train and validation sets for target relation
        train_set_targetRelation = TensorDataset(token_id_relation_train, 
                                  attention_masks_relation_train, 
                                  labels_relation_train)

        val_set_targetRelation = TensorDataset(token_id_relation_val, 
                                  attention_masks_relation_val, 
                                  labels_relation_val)

    if input_type == 2:

        train_set = TensorDataset(token_id_relation_train_general, 
                                  attention_masks_relation_train_general, 
                                  labels_relation_train_general)

        val_set = TensorDataset(token_id_relation_val_general, 
                                  attention_masks_relation_val_general, 
                                  labels_relation_val_general)
        
if split_type == 2:
    
    val_ratio = 0.2
    
    train_idx, val_idx = train_test_split(
    np.arange(len(labels_topic)),
    test_size = val_ratio,
    shuffle = True)
    
    if input_type == 1:
        # Train and validation sets for claim
        train_set_claimSentiment = TensorDataset(token_id_claim[train_idx], 
                                  attention_masks_claim[train_idx], 
                                  labels_claim[train_idx])

        val_set_claimSentiment = TensorDataset(token_id_claim[val_idx], 
                                  attention_masks_claim[val_idx], 
                                  labels_claim[val_idx])

        # Train and validation sets for topic
        train_set_topicSentiment = TensorDataset(token_id_topic[train_idx], 
                                  attention_masks_topic[train_idx], 
                                  labels_topic[train_idx])

        val_set_topicSentiment = TensorDataset(token_id_topic[val_idx], 
                                  attention_masks_topic[val_idx], 
                                  labels_topic[val_idx])

        # Train and validation sets for target relation
        train_set_targetRelation = TensorDataset(token_id_relation[train_idx], 
                                  attention_masks_relation[train_idx], 
                                  labels_relation[train_idx])

        val_set_targetRelation = TensorDataset(token_id_relation[val_idx], 
                                  attention_masks_relation[val_idx], 
                                  labels_relation[val_idx])

    if input_type == 2:

        train_set = TensorDataset(token_id_relation_pure[train_idx], 
                                  attention_masks_relation_pure[train_idx], 
                                  labels_relation_pure[train_idx])

        val_set = TensorDataset(token_id_relation_pure[val_idx], 
                                  attention_masks_relation_pure[val_idx], 
                                  labels_relation_pure[val_idx])

# Preparing Model

In [18]:
model = BertForSequenceClassification.from_pretrained(token,num_labels = 2,output_attentions = False,
                                                          output_hidden_states = False)

optimizer = torch.optim.AdamW(model.parameters(), 
                              lr = 5e-5, # 5e-5, 3e-5, 2e-5
                              eps = 1e-08
                              )

Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint a

In [19]:
batch_size = 32
epochs = 4

# Model Runing and Evaluation

In [20]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [21]:
predictions_list = [[],[],[]]

In [38]:
option = int(input('Wich step are you in: topic[1], claim[2] or relation[3]? '))

Wich step are you in: topic[1], claim[2] or relation[3]? 3


In [39]:
if option-1 == 0:
    train_set = train_set_claimSentiment
    val_set = val_set_claimSentiment

elif option-1 == 1:
    train_set = train_set_topicSentiment
    val_set = val_set_topicSentiment

elif option-1 == 2:
    train_set = train_set_targetRelation
    val_set = val_set_targetRelation

train_dataloader = DataLoader(
                train_set,
                sampler = RandomSampler(train_set),
                batch_size = batch_size
            )

validation_dataloader = DataLoader(
                val_set,
                sampler = SequentialSampler(val_set),
                batch_size = batch_size
            )

In [40]:
if input_type == 1:
    for k in range(0,1):

        for _ in trange(epochs, desc = 'Epoch'):

            # ========== Training ==========

            # Set model to training mode
            model.train()

            # Tracking variables
            tr_loss = 0
            nb_tr_examples, nb_tr_steps = 0, 0

            for step, batch in enumerate(train_dataloader):
                batch = tuple(t.to(device) for t in batch)
                b_input_ids, b_input_mask, b_labels = batch
                optimizer.zero_grad()
                # Forward pass
                train_output = model(b_input_ids, 
                                     token_type_ids = None, 
                                     attention_mask = b_input_mask, 
                                     labels = b_labels)
                # Backward pass
                train_output.loss.backward()
                optimizer.step()
                # Update tracking variables
                tr_loss += train_output.loss.item()
                nb_tr_examples += b_input_ids.size(0)
                nb_tr_steps += 1

           # ========== Validation ==========

            # Set model to evaluation mode
            model.eval()

            # Tracking variables 
            val_accuracy = []
            val_precision = []
            val_recall = []
            val_specificity = []

            for batch in validation_dataloader:
                batch = tuple(t.to(device) for t in batch)
                b_input_ids, b_input_mask, b_labels = batch
                with torch.no_grad():
                  # Forward pass
                  eval_output = model(b_input_ids, 
                                      token_type_ids = None, 
                                      attention_mask = b_input_mask)

                logits = eval_output.logits.detach().cpu().numpy()
                label_ids = b_labels.to('cpu').numpy()
                # Calculate validation metrics
                b_accuracy, b_precision, b_recall, b_specificity = b_metrics(logits, label_ids)
                val_accuracy.append(b_accuracy)
                # Update precision only when (tp + fp) !=0; ignore nan
                if b_precision != 'nan': val_precision.append(b_precision)
                # Update recall only when (tp + fn) !=0; ignore nan
                if b_recall != 'nan': val_recall.append(b_recall)
                # Update specificity only when (tn + fp) !=0; ignore nan
                if b_specificity != 'nan': val_specificity.append(b_specificity)


            print('\n\t - Train loss: {:.4f}'.format(tr_loss / nb_tr_steps))
            print('\t - Validation Accuracy: {:.4f}'.format(sum(val_accuracy)/len(val_accuracy)))
            print('\t - Validation Precision: {:.4f}'.format(sum(val_precision)/len(val_precision)) if len(val_precision)>0 else '\t - Validation Precision: NaN')
            print('\t - Validation Recall: {:.4f}'.format(sum(val_recall)/len(val_recall)) if len(val_recall)>0 else '\t - Validation Recall: NaN')
            print('\t - Validation Specificity: {:.4f}\n'.format(sum(val_specificity)/len(val_specificity)) if len(val_specificity)>0 else '\t - Validation Specificity: NaN')

Epoch:  25%|██████████████████▎                                                      | 1/4 [20:43<1:02:11, 1243.88s/it]


	 - Train loss: 0.5143
	 - Validation Accuracy: 0.8021
	 - Validation Precision: 0.8021
	 - Validation Recall: 1.0000
	 - Validation Specificity: 0.0000



Epoch:  50%|█████████████████████████████████████▌                                     | 2/4 [40:35<40:26, 1213.02s/it]


	 - Train loss: 0.5036
	 - Validation Accuracy: 0.8021
	 - Validation Precision: 0.8021
	 - Validation Recall: 1.0000
	 - Validation Specificity: 0.0000



Epoch:  75%|██████████████████████████████████████████████████████▊                  | 3/4 [1:01:34<20:34, 1234.27s/it]


	 - Train loss: 0.5062
	 - Validation Accuracy: 0.8021
	 - Validation Precision: 0.8021
	 - Validation Recall: 1.0000
	 - Validation Specificity: 0.0000



Epoch: 100%|█████████████████████████████████████████████████████████████████████████| 4/4 [1:21:04<00:00, 1216.09s/it]


	 - Train loss: 0.5079
	 - Validation Accuracy: 0.8021
	 - Validation Precision: 0.8021
	 - Validation Recall: 1.0000
	 - Validation Specificity: 0.0000






# Model Evaluation

In [41]:
if input_type == 1:
    train_idx = np.array(df.index[df['split'] == 'train'].tolist())
    val_idx = np.array(df.index[df['split'] == 'train'].tolist())

In [42]:
#Selecionar qual base usar
if option  == 3 or option == 0:
    frases = df_targetsRelation['text'][val_idx].values.tolist()
elif option  == 1:
    frases = df['topicText'][val_idx].values.tolist()
elif option == 2:
    frases = df['claims.claimCorrectedText'][val_idx].values.tolist()

In [43]:
prediction_list = []
for i in frases:
    new_sentence = i

    # We need Token IDs and Attention Mask for inference on the new sentence
    test_ids = []
    test_attention_mask = []

    # Apply the tokenizer
    encoding = preprocessing(new_sentence, tokenizer)

    # Extract IDs and Attention Mask
    test_ids.append(encoding['input_ids'])
    test_attention_mask.append(encoding['attention_mask'])
    test_ids = torch.cat(test_ids, dim = 0)
    test_attention_mask = torch.cat(test_attention_mask, dim = 0)

    # Forward pass, calculate logit predictions
    with torch.no_grad():
      output = model(test_ids.to(device), token_type_ids = None, attention_mask = test_attention_mask.to(device))

    prediction = 1 if np.argmax(output.logits.cpu().numpy()).flatten().item() == 1 else 0
    
    prediction_list.append(prediction)



In [44]:
if option == 3 or option== 0:
    predictions_list[2] = prediction_list
elif option == 1:
    predictions_list[0] = prediction_list
elif option == 2:
    predictions_list[1] = prediction_list

In [45]:
if input_type== 2:
    print('Accuracy: ', accuracy_score(df['claims.stance'][val_idx], predictions_list[2]))
    print('Precision: ', precision_score(df['claims.stance'][val_idx], predictions_list[2]))
    print('Recall: ', recall_score(df['claims.stance'][val_idx], predictions_list[2]))
    print('Specificity: ',recall_score(df['claims.stance'][val_idx], predictions_list[2], pos_label = 0))

if (input_type == 1 and option == 1):
    print('Accuracy: ', accuracy_score(df['claims.stance'][val_idx], predictions_list[0]))
    print('Precision: ', precision_score(df['claims.stance'][val_idx], predictions_list[0]))
    print('Recall: ', recall_score(df['claims.stance'][val_idx], predictions_list[0]))
    print('Specificity: ', recall_score(df['claims.stance'][val_idx], predictions_list[0], pos_label = 0))
          
if (input_type == 1 and option == 2):
    print('Accuracy: ', accuracy_score(df['claims.stance'][val_idx], predictions_list[1]))
    print('Precision: ', precision_score(df['claims.stance'][val_idx], predictions_list[1]))
    print('Recall: ', recall_score(df['claims.stance'][val_idx], predictions_list[1]))
    print('Specificity: ', recall_score(df['claims.stance'][val_idx], predictions_list[1], pos_label = 0))
          
if (input_type == 1 and option == 3):
    print('Accuracy: ', accuracy_score(df['claims.stance'][val_idx], predictions_list[2]))
    print('Precision: ', precision_score(df['claims.stance'][val_idx], predictions_list[2]))
    print('Recall: ', recall_score(df['claims.stance'][val_idx], predictions_list[2]))
    print('Specificity: ', recall_score(df['claims.stance'][val_idx], predictions_list[2], pos_label = 0))

Accuracy:  0.6036960985626283
Precision:  0.6036960985626283
Recall:  1.0
Specificity:  0.0


In [46]:
resp_menosum = df['claims.stance'][val_idx].copy()
df= df.replace(-1, int(-1))

resul_ibm_method = []
for i in range(len(predictions_list[0])):
    if predictions_list[0][i] == 0:
        predictions_list[0][i] == -1
    if predictions_list[1][i] == 0:
        predictions_list[1][i] == -1
    if predictions_list[2][i] == 0:
        predictions_list[2][i] == -1
    resul_ibm_method.append(predictions_list[0][i]*predictions_list[1][i]*predictions_list[2][i])

In [51]:
print('Accuracy: ', accuracy_score(resp_menosum, resul_ibm_method))
print('Precision: ', precision_score(resp_menosum, resul_ibm_method))
print('Recall: ', recall_score(resp_menosum, resul_ibm_method))
print('Specificity: ',recall_score(resp_menosum, resul_ibm_method, pos_label = 0))

Accuracy:  0.39630390143737165
Precision:  0.0
Recall:  0.0
Specificity:  1.0


  _warn_prf(average, modifier, msg_start, len(result))


# Benchmark - IBM API

In [47]:
api_key = '' 

In [48]:

    
#df_ibm_all = df.loc[[val_idx.tolist()],['topicTarget', 'claims.claimCorrectedText', 'claims.stance']]
#df_ibm = df_ibm_all[val_idx]

df_ibm_topic = df['topicTarget'][val_idx]
df_ibm_claim = df['claims.claimCorrectedText'][val_idx]
df_ibm_target = df['claims.stance'][val_idx]

df_ibm_topic.reset_index(drop = True, inplace = True)
df_ibm_claim.reset_index(drop = True, inplace = True)

sentence_topic_dicts = []
for i in range(len(df_ibm_topic)):
    dicti = {'topic': df_ibm_topic[i], 'sentence': df_ibm_claim[i]}
    sentence_topic_dicts.append(dicti)
    

#sentence_topic_dicts = df_data.to_dict('records')
#list_target = df_target.values.tolist()
#list_target_corrected = []
#for i in range(len(list_target)):
 #   list_target_corrected.append(list_target[i][0])

In [49]:
from debater_python_api.api.debater_api import DebaterApi

debater_api = DebaterApi(api_key)
pro_con_client = debater_api.get_pro_con_client()

scores = pro_con_client.run(sentence_topic_dicts)

resp_ibm = []

for j in range(len(sentence_topic_dicts)):
    if scores[j] > 0:
        resp_ibm.append(1)
    elif scores[j] < 0:
        resp_ibm.append(0)

ProConClient: 100%|██████████████████████████████████████████████████████████████████| 974/974 [00:25<00:00, 38.12it/s]


In [50]:
print('Accuracy: ', accuracy_score(df['claims.stance'][val_idx], resp_ibm))
print('Precision: ', precision_score(df['claims.stance'][val_idx], resp_ibm))
print('Recall: ', recall_score(df['claims.stance'][val_idx], resp_ibm))
print('Specificity: ',recall_score(df['claims.stance'][val_idx], resp_ibm, pos_label = 0))

Accuracy:  0.6457905544147844
Precision:  0.7434869739478958
Recall:  0.6309523809523809
Specificity:  0.6683937823834197


# Save Model

In [None]:
tokenizer.save_pretrained(save_directory)
model.save_pretrained(save_directory)

# Reference for Code

https://towardsdatascience.com/fine-tuning-bert-for-text-classification-54e7df642894