In [None]:
!pip install transformers datasets
import json
import pandas as pd
import numpy as np
from sklearn import metrics

In [None]:
!pip install -U torchtext==0.10.0
from torchtext.legacy.data import Field, TabularDataset, BucketIterator, Iterator

In [None]:
import pandas as pd

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

In [None]:
id2label = {0 : "Limitation of liability",
           1 : "Unilateral termination",
           2: "Unilateral change",
           3: "Content removal",
           4: "Contract by using",
           5: "Choice of law",
           6: "Jurisdiction",
           7: "Arbitration", 
          }

descrip_label = {"Limitation of liability" : "Limitation of liability stipulates that the duty to pay damages is limited or excluded, for certain kind of losses, under certain conditions.",
           "Unilateral termination" : "Unilateral termination gives provider the right to suspend and/or terminate the service and/or the contract, and sometimes details the circumstances under which the provider claims to have a right to do so. Unilateral termination clauses that specify reasons for termination were marked as potentially unfair, whereas clauses stipulating that the service provider may suspend or terminate the service at any time for any or no reasons and/or without notice were marked as clearly unfair.",
           "Unilateral change": "Unilateral change specifies the conditions under which the service provider could amend and modify the terms of service and/or the service itself.",
           "Content removal": "Content removal  gives the provider a right to modify/delete user’s content, including in-app purchases, and sometimes specifies the conditions under which the service provider may do so.",
           "Contract by using": "Contract by using stipulates that the consumer is bound by the terms of use of a specific service, simply by using the service, without even being required to mark that he or she has read and accepted them.",
           "Choice of law": "Choice of law  specifies what law will govern the contract, meaning also what law will be applied in potential adjudication of a dispute arising under the contract. ",
           "Jurisdiction": "Jurisdiction stipulates what courts will have the competence to adjudicate disputes under the contract. Clauses stating that any judicial proceeding takes a residence away (i.e. in a different city, different country) were marked as clearly unfair.",
           "Arbitration": "Arbitration requires or allows the parties to resolve their disputes through an arbitration process, before the case could go to court. It is therefore considered a kind of forum selection clause. Clauses stipulating that the arbitration should take place in a state other then the state of consumer’s residence and/or be based not on law but on arbiter’s discretion were marked as clearly unfair.", 
           }
label2id = {v: k for k, v in id2label.items()}

In [None]:
from datasets import load_dataset, list_datasets
dataset = load_dataset("lex_glue", 'unfair_tos')

df_all = pd.DataFrame()
df_training = pd.DataFrame()
df_valid = pd.DataFrame()
df_testing = pd.DataFrame()

for row in dataset['train']:
    df_all = df_all.append(row, ignore_index=True)
    row['labels'] = [id2label[l] for l in row['labels']]
    df_training = df_training.append(row, ignore_index=True)

for row in dataset['validation']:
    df_all = df_all.append(row, ignore_index=True)
    row['labels'] = [id2label[l] for l in row['labels']]
    df_valid = df_valid.append(row, ignore_index=True)

for row in dataset['test']:
    df_all = df_all.append(row, ignore_index=True)
    row['labels'] = [id2label[l] for l in row['labels']]
    df_testing = df_testing.append(row, ignore_index=True)


In [None]:
def modif_df(df_test, test=False):
  df_modif_test = pd.DataFrame(columns=['gold_label','sentence1', 'sentence2'])

  for index, row in df_test.iterrows():
      for label in list(descrip_label.values()):
        if label in row['labels']:
            df_modif_test = df_modif_test.append({'gold_label': '1', 'sentence1': row['text'] , 'sentence2': label}, ignore_index=True)
        else:
            df_modif_test = df_modif_test.append({'gold_label': '0', 'sentence1': row['text'], 'sentence2': label}, ignore_index=True)

  return df_modif_test

df_modif_test = modif_df(df_testing)
df_modif_train = modif_df(df_training)
df_modif_val = modif_df(df_valid)

In [None]:
df_modif_test

In [None]:
print(len(df_modif_test))
df_modif_test['gold_label'][0]

In [None]:
df_true_test = pd.DataFrame(columns=['Limitation_of_liability', 'Unilateral_termination', 'Unilateral_change',
                                      'Content_removal', 'Contract_by_using', 'Choice_of_law', 'Jurisdiction', 'Arbitration', 'No_violation'])

for i in range(0, len(df_modif_test), 8):
    if df_modif_test['gold_label'][i] == '0' and  df_modif_test['gold_label'][i + 1] == '0' and df_modif_test['gold_label'][i + 2] == '0' and df_modif_test['gold_label'][i + 3] == '0' and df_modif_test['gold_label'][i + 4] == '0' and df_modif_test['gold_label'][i + 5] == '0' and df_modif_test['gold_label'][i + 6] == '0' and df_modif_test['gold_label'][i + 7] == '0':
        df_true_test = df_true_test.append({
            # 'sentence1': df_modif_test['sentence1'][i] ,
                                            'Limitation_of_liability': df_modif_test['gold_label'][i], 
                                            'Unilateral_termination': df_modif_test['gold_label'][i + 1],
                            'Unilateral_change': df_modif_test['gold_label'][i + 2], 'Content_removal': df_modif_test['gold_label'][i + 3], 'Contract_by_using': df_modif_test['gold_label'][i + 4], 
                            'Choice_of_law': df_modif_test['gold_label'][i + 5], 'Jurisdiction': df_modif_test['gold_label'][i + 6], 'Arbitration': df_modif_test['gold_label'][i + 7],
                            'No_violation' : '1'}
                            , ignore_index=True)
    else:
        df_true_test = df_true_test.append({
            # 'sentence1': df_modif_test['sentence1'][i] ,
                                            'Limitation_of_liability': df_modif_test['gold_label'][i], 
                                            'Unilateral_termination': df_modif_test['gold_label'][i + 1],
                            'Unilateral_change': df_modif_test['gold_label'][i + 2], 'Content_removal': df_modif_test['gold_label'][i + 3], 'Contract_by_using': df_modif_test['gold_label'][i + 4], 
                            'Choice_of_law': df_modif_test['gold_label'][i + 5], 'Jurisdiction': df_modif_test['gold_label'][i + 6], 'Arbitration': df_modif_test['gold_label'][i + 7],
                            'No_violation' : 'no'}
                            , ignore_index=True)

df_true_test.head()

In [None]:
df_true_test = df_true_test.apply(pd.to_numeric)
# df_true_test['all'] = df_true_test.apply(', '.join, axis=1)
df_true_test

In [None]:
import torch

SEED = 1111
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
len(tokenizer)

In [None]:
init_token = tokenizer.cls_token
eos_token = tokenizer.sep_token
pad_token = tokenizer.pad_token
unk_token = tokenizer.unk_token

print(init_token, eos_token, pad_token, unk_token)

In [None]:
init_token_idx = tokenizer.cls_token_id
eos_token_idx = tokenizer.sep_token_id
pad_token_idx = tokenizer.pad_token_id
unk_token_idx = tokenizer.unk_token_id

print(init_token_idx, eos_token_idx, pad_token_idx, unk_token_idx)

In [None]:
max_input_length = tokenizer.max_model_input_sizes['bert-base-uncased']

print(max_input_length)

max_input_length = 255

##Prepare Dataset

In [None]:
def tokenize_bert(sentence):
    tokens = tokenizer.tokenize(sentence) 
    return tokens

def split_and_cut(sentence):
    tokens = sentence.strip().split(" ")
    tokens = tokens[:max_input_length-1]
    return tokens
    
def trim_sentence(sent):
    try:
        sent = sent.split()
        sent = sent[:128]
        return " ".join(sent)
    except:
        return sent

In [None]:
def get_sent1_token_type(sent):
    try:
        return [0]* len(sent)
    except:
        return []

def get_sent2_token_type(sent):
    try:
        return [1]* len(sent)
    except:
        return []
    
def combine_seq(seq):
    return " ".join(seq)

def combine_mask(mask):
    mask = [str(m) for m in mask]
    return " ".join(mask)

In [None]:
df_modif_train.loc[264:271]

In [None]:
df_train = df_modif_train[['gold_label','sentence1','sentence2']]
df_dev = df_modif_val[['gold_label','sentence1','sentence2']]
df_test = df_modif_test[['gold_label','sentence1','sentence2']]

df_train['sentence1'] = df_train['sentence1'].apply(trim_sentence)
df_train['sentence2'] = df_train['sentence2'].apply(trim_sentence)
df_dev['sentence1'] = df_dev['sentence1'].apply(trim_sentence)
df_dev['sentence2'] = df_dev['sentence2'].apply(trim_sentence)
df_test['sentence1'] = df_test['sentence1'].apply(trim_sentence)
df_test['sentence2'] = df_test['sentence2'].apply(trim_sentence)

df_train['sent1'] = '[CLS] ' + df_train['sentence1'] + ' [SEP] '
df_train['sent2'] = df_train['sentence2'] + ' [SEP]'
df_dev['sent1'] = '[CLS] ' + df_dev['sentence1'] + ' [SEP] '
df_dev['sent2'] = df_dev['sentence2'] + ' [SEP]'
df_test['sent1'] = '[CLS] ' + df_test['sentence1'] + ' [SEP] '
df_test['sent2'] = df_test['sentence2'] + ' [SEP]'

df_train['sent1_t'] = df_train['sent1'].apply(tokenize_bert)
df_train['sent2_t'] = df_train['sent2'].apply(tokenize_bert)
df_dev['sent1_t'] = df_dev['sent1'].apply(tokenize_bert)
df_dev['sent2_t'] = df_dev['sent2'].apply(tokenize_bert)
df_test['sent1_t'] = df_test['sent1'].apply(tokenize_bert)
df_test['sent2_t'] = df_test['sent2'].apply(tokenize_bert)

df_train['sent1_token_type'] = df_train['sent1_t'].apply(get_sent1_token_type)
df_train['sent2_token_type'] = df_train['sent2_t'].apply(get_sent2_token_type)
df_dev['sent1_token_type'] = df_dev['sent1_t'].apply(get_sent1_token_type)
df_dev['sent2_token_type'] = df_dev['sent2_t'].apply(get_sent2_token_type)
df_test['sent1_token_type'] = df_test['sent1_t'].apply(get_sent1_token_type)
df_test['sent2_token_type'] = df_test['sent2_t'].apply(get_sent2_token_type)

df_train['sequence'] = df_train['sent1_t'] + df_train['sent2_t']
df_dev['sequence'] = df_dev['sent1_t'] + df_dev['sent2_t']
df_test['sequence'] = df_test['sent1_t'] + df_test['sent2_t']


df_train['attention_mask'] = df_train['sequence'].apply(get_sent2_token_type)
df_dev['attention_mask'] = df_dev['sequence'].apply(get_sent2_token_type)
df_test['attention_mask'] = df_test['sequence'].apply(get_sent2_token_type)

df_train['token_type'] = df_train['sent1_token_type'] + df_train['sent2_token_type']
df_dev['token_type'] = df_dev['sent1_token_type'] + df_dev['sent2_token_type']
df_test['token_type'] = df_test['sent1_token_type'] + df_test['sent2_token_type']

df_train['sequence'] = df_train['sequence'].apply(combine_seq)
df_dev['sequence'] = df_dev['sequence'].apply(combine_seq)
df_test['sequence'] = df_test['sequence'].apply(combine_seq)

df_train['attention_mask'] = df_train['attention_mask'].apply(combine_mask)
df_dev['attention_mask'] = df_dev['attention_mask'].apply(combine_mask)
df_test['attention_mask'] = df_test['attention_mask'].apply(combine_mask)

df_train['token_type'] = df_train['token_type'].apply(combine_mask)
df_dev['token_type'] = df_dev['token_type'].apply(combine_mask)
df_test['token_type'] = df_test['token_type'].apply(combine_mask)

df_train = df_train[['gold_label', 'sequence', 'attention_mask', 'token_type']]
df_dev = df_dev[['gold_label', 'sequence', 'attention_mask', 'token_type']]
df_test = df_test[['gold_label', 'sequence', 'attention_mask', 'token_type']]

In [None]:
df_train = df_train.loc[df_train['gold_label'].isin(['1','0'])]
df_dev = df_dev.loc[df_dev['gold_label'].isin(['1','0'])]
df_test = df_test.loc[df_test['gold_label'].isin(['1','0'])]

df_train.to_csv('/unfairtos_train.csv', index=False)
df_dev.to_csv('/unfairtos_dev.csv', index=False)
df_test.to_csv('/unfairtos_test.csv', index=False)

In [None]:
df_train.head()

In [None]:
def convert_to_int(tok_ids):
    tok_ids = [int(x) for x in tok_ids]
    return tok_ids

from torchtext.legacy.data import Field, LabelField

TEXT = Field(batch_first = True,
                  use_vocab = False,
                  tokenize = split_and_cut,
                  preprocessing = tokenizer.convert_tokens_to_ids,
                  pad_token = pad_token_idx,
                  unk_token = unk_token_idx)

LABEL = LabelField()

ATTENTION = Field(batch_first = True,
                  use_vocab = False,
                  tokenize = split_and_cut,
                  preprocessing = convert_to_int,
                  pad_token = pad_token_idx)

TTYPE = Field(batch_first = True,
                  use_vocab = False,
                  tokenize = split_and_cut,
                  preprocessing = convert_to_int,
                  pad_token = 1)

In [None]:
fields = [('label', LABEL), ('sequence', TEXT), ('attention_mask', ATTENTION), ('token_type', TTYPE)]

train_data, valid_data, test_data = TabularDataset.splits(
                                        path = '/',
                                        train = 'unfairtos_train.csv',
                                        validation = 'unfairtos_dev.csv',
                                        test = 'unfairtos_test.csv',
                                        format = 'csv',
                                        fields = fields,
                                        skip_header = True)

In [None]:
print(f"Number of training data: {len(train_data)}")
print(f"Number of validation data: {len(valid_data)}")
print(f"Number of testing data: {len(test_data)}")

train_data_len = len(train_data)

In [None]:
LABEL.build_vocab(train_data)
print(LABEL.vocab.stoi)
print(LABEL.vocab.freqs.most_common())
print(LABEL.vocab.itos)

In [None]:
BATCH_SIZE = 16

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size = BATCH_SIZE,
    sort_key = lambda x: len(x.sequence),
    sort_within_batch = False, 
    device = device)

In [None]:
print(device)

In [None]:
from transformers import BertModel

bert_model = BertModel.from_pretrained('bert-base-uncased')

In [None]:
import torch.nn as nn

class BERTNLIModel(nn.Module):
    def __init__(self,
                 bert_model,
                 hidden_dim,
                 output_dim,
                ):
        
        super().__init__()
        
        self.bert = bert_model
        
        embedding_dim = bert_model.config.to_dict()['hidden_size']
        
        #self.fc = nn.Linear(embedding_dim, hidden_dim)

        #self.fc2 = nn.Linear(hidden_dim, hidden_dim)

        self.out = nn.Linear(embedding_dim, output_dim)
        
        
    def forward(self, sequence, attn_mask, token_type):
        
        #sequence = [sequence len, batch_size]
        #attention_mask = [seq_len, batch_size]
        #token_type = [seq_len, batch_size]
                
        embedded = self.bert(input_ids = sequence, attention_mask = attn_mask, token_type_ids= token_type)[1]
        #print('emb ', embedded.size())

        #self.bert() gives tuple which contains hidden outut corresponding to each token.
        #self.bert()[0] = [seq_len, batch_size, emd_dim]
                
        #embedded = [batch size, emb dim]
        
        #ff = self.fc(embedded)
        #ff = [batch size, hid dim]

        #ff1 = self.fc2(ff)
                
        
        
        output = self.out(embedded)
        #print('output: ', output.size())
        #output = [batch size, out dim]
        
        return output

In [None]:
HIDDEN_DIM = 512
OUTPUT_DIM = len(LABEL.vocab)

model = BERTNLIModel(bert_model,
                         HIDDEN_DIM,
                         OUTPUT_DIM,
                        ).to(device)

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 109,483,009 trainable parameters


In [None]:
import torch.optim as optim
import transformers
#optimizer = optim.Adam(model.parameters())
optimizer = optim.AdamW(model.parameters(), lr=2e-5)

def get_scheduler(optimizer, warmup_steps):
    scheduler = transformers.get_constant_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps)
    return scheduler

criterion = nn.CrossEntropyLoss().to(device)

def categorical_accuracy(preds, y):
    max_preds = preds.argmax(dim = 1, keepdim = True) # get the index of the max probability
    correct = (max_preds.squeeze(1)==y).float()
    return correct.sum() / len(y)

In [None]:
max_grad_norm = 1

def train(model, iterator, optimizer, criterion, scheduler):
    #print(iterator)
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:

        optimizer.zero_grad() # clear gradients first
        torch.cuda.empty_cache() # releases all unoccupied cached memory 
        

        sequence = batch.sequence
        attn_mask = batch.attention_mask
        token_type = batch.token_type
        #print(sequence.size(), attn_mask.size(), token_type.size())
        #print(sequence[0])
        #print(attn_mask[0])
        #print(token_type[0])
        label = batch.label
        
        predictions = model(sequence, attn_mask, token_type)
        
        #predictions = [batch_size, 3]
        #print(predictions.size())
        
        loss = criterion(predictions, label)
        
        acc = categorical_accuracy(predictions, label)
        
        loss.backward()
        
        optimizer.step()
        scheduler.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [None]:
def evaluate(model, iterator, criterion):
    #print(iterator)
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:
            #print(batch)

            sequence = batch.sequence
            attn_mask = batch.attention_mask
            token_type = batch.token_type
            labels = batch.label
                        
            predictions = model(sequence, attn_mask, token_type)
            
            loss = criterion(predictions, labels)
                
            acc = categorical_accuracy(predictions, labels)
            
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [None]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [None]:
import math
N_EPOCHS = 3

warmup_percent = 0.2
total_steps = math.ceil(N_EPOCHS*train_data_len*1./BATCH_SIZE)
warmup_steps = int(total_steps*warmup_percent)
scheduler = get_scheduler(optimizer, warmup_steps)

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion, scheduler)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'bert-nli.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

In [None]:
model.load_state_dict(torch.load('bert-nli.pt'))

test_loss, test_acc = evaluate(model, test_iterator, criterion)

print(f'Test Loss: {test_loss:.3f} |  Test Acc: {test_acc*100:.2f}%')

In [None]:
import joblib
joblib.dump(model, 'text_descriplabel.pkl')

In [None]:
model = joblib.load('/content/text_descriplabel.pkl')

In [None]:
def predict_inference(premise, hypothesis, model, device):
    
    model.eval()
    
    premise = '[CLS] ' + premise + ' [SEP]'
    hypothesis = hypothesis + ' [SEP]'
    
    prem_t = tokenize_bert(premise)
    hypo_t = tokenize_bert(hypothesis)
    
    #print(len(prem_t), len(hypo_t))
    
    prem_type = get_sent1_token_type(prem_t)
    hypo_type = get_sent2_token_type(hypo_t)
    
    #print(len(prem_type), len(hypo_type))
    
    indexes = prem_t + hypo_t
    
    indexes = tokenizer.convert_tokens_to_ids(indexes)
    #print(indexes)
    indexes_type = prem_type + hypo_type
    #print(indexes_type)
    
    attn_mask = get_sent2_token_type(indexes)
    #print(attn_mask)
    
    #print(len(indexes))
    #print(len(indexes_type))
    #print(len(attn_mask))

    #seq = '[CLS] '+ premise + ' [SEP] '+ hypothesis 

    #tokens = tokenizer.tokenize(seq)

    #indexes = tokenizer.convert_tokens_to_ids(tokens)
    
    indexes = torch.LongTensor(indexes).unsqueeze(0).to(device)
    indexes_type = torch.LongTensor(indexes_type).unsqueeze(0).to(device)
    attn_mask = torch.LongTensor(attn_mask).unsqueeze(0).to(device)
    
    
    prediction = model(indexes, attn_mask, indexes_type)
    
    prediction = prediction.argmax(dim=-1).item()
    
    return LABEL.vocab.itos[prediction]

In [None]:
# premise = 'academia.edu reserves the right , at its sole discretion , to discontinue or terminate the site and services and to terminate these terms , at any time and without prior notice . '
# hypothesis = 'Unilateral termination' 
# print(predict_inference(premise, hypothesis, model, device))

# premise = 'by using amazon services , you agree to these conditions . '
# hypothesis = 'Contract by using'
# print(predict_inference(premise, hypothesis, model, device))

In [None]:
final_df = pd.DataFrame(columns=['pred', 'true', 'label'])

for index, row in df_modif_test.iterrows():
  pred = predict_inference(row['sentence1'], row['sentence2'], model, device)
  final_df.loc[len(final_df.index)] = [pred, row['gold_label'], row['sentence2'], ]

In [None]:
final_df.head(10)

In [None]:
df_pred_test = pd.DataFrame(columns=['Limitation_of_liability', 'Unilateral_termination', 'Unilateral_change',
                                      'Content_removal', 'Contract_by_using', 'Choice_of_law', 'Jurisdiction', 'Arbitration', 'No_violation'])

for i in range(0, len(final_df), 8):
  if final_df['pred'][i] == '0' and  final_df['pred'][i + 1] == '0' and final_df['pred'][i + 2] == '0' and final_df['pred'][i + 3] == '0' and final_df['pred'][i + 4] == '0' and final_df['pred'][i + 5] == '0' and final_df['pred'][i + 6] == '0' and final_df['pred'][i + 7] == '0':      
      df_pred_test = df_pred_test.append({
        # 'sentence1': df_modif_test['sentence1'][i] ,
                         'Limitation_of_liability': final_df['pred'][i], 'Unilateral_termination': final_df['pred'][i + 1],
                         'Unilateral_change': final_df['pred'][i + 2], 'Content_removal': final_df['pred'][i + 3], 'Contract_by_using': final_df['pred'][i + 4], 
                         'Choice_of_law': final_df['pred'][i + 5], 'Jurisdiction': final_df['pred'][i + 6], 'Arbitration': final_df['pred'][i + 7],
                          'No_violation': '1'}
                        , ignore_index=True)
  else:
      df_pred_test = df_pred_test.append({
        # 'sentence1': df_modif_test['sentence1'][i] ,
                         'Limitation_of_liability': final_df['pred'][i], 'Unilateral_termination': final_df['pred'][i + 1],
                         'Unilateral_change': final_df['pred'][i + 2], 'Content_removal': final_df['pred'][i + 3], 'Contract_by_using': final_df['pred'][i + 4], 
                         'Choice_of_law': final_df['pred'][i + 5], 'Jurisdiction': final_df['pred'][i + 6], 'Arbitration': final_df['pred'][i + 7],
                          'No_violation': '0'}
                        , ignore_index=True)
      

df_pred_test

In [None]:
df_pred_test = df_pred_test.apply(pd.to_numeric)
df_pred_test

In [None]:
# Evaluate performance
from sklearn import metrics
test_preds = df_pred_test.to_numpy()
test_targets = df_true_test.to_numpy()

f1_score_micro = metrics.f1_score(test_targets, test_preds, average='micro')
f1_score_macro = metrics.f1_score(test_targets, test_preds, average='macro')
print(f"F1 Score (Micro) = {f1_score_micro}")
print(f"F1 Score (Macro) = {f1_score_macro}")

In [None]:
for c in ['Limitation_of_liability', 'Unilateral_termination', 'Unilateral_change',
                                      'Content_removal', 'Contract_by_using', 'Choice_of_law', 'Jurisdiction', 'Arbitration', 'No_violation']:

  test_preds = df_pred_test[c].to_numpy()
  test_true = df_true_test[c].to_numpy()

  f1_score_micro = metrics.f1_score(test_targets, test_preds, average='micro')
  f1_score_macro = metrics.f1_score(test_targets, test_preds, average='macro')
  print(f"F1 Score (Micro) {c} = {f1_score_micro}")
  print(f"F1 Score (Macro) {c} = {f1_score_macro}")
  print()