# Task for the identification of argumentative sentence types with a multilabel approach

Files needed to run the notebook:

- The pickle file *dataset.pkl*; 

Import libraries:

In [1]:
import pickle as pkl
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import os
import json
import numpy as np

from sentence_transformers import SentenceTransformer
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, LSTM
from tensorflow.keras.regularizers import l2

In [2]:
df = pd.read_pickle("./dataset.pkl")  

In [3]:
df.head()

Unnamed: 0,Document,Name,Id,Sentence,Type,Supported_by,Supported_from_failure,Attacked_by,Inhibited_by,Rephrased_by,Argumentation_scheme
2,2004,prem,A3,respect borne mind pursuant Article 58 Statute...,L,,,,,,Rule
3,2004,prem,A4,Advocate General states point 20 Opinion Commi...,F,,,,,,Aut
5,2004,prem,A6,Regarding Wam’s argument Commission’s appeal i...,L|F,,,,,,Rule
6,2004,prem,A7,Appeals judgments Court First Instance governe...,L,,,,,,Rule
7,2004,prem,A8,Next must noted obligation provide statement r...,L,,,,,,Prec


In [4]:
df["Argumentation_scheme"].values[0]

'Rule'

In [5]:
df["Argumentation_scheme"].value_counts()

Prec                  279
Itpr                   95
Princ                  64
Rule                   61
Prec|Rule              35
Itpr|Prec              22
Aut                    21
Class|Prec|Rule         9
Tele                    8
Class                   7
Class|Prec              5
Aut|Itpr                5
Prec|Tele               5
Itpr|Tele               4
Rule|Syst               3
Itpr|Rule               3
Prec|Syst               3
Syst                    3
Princ|Rule              2
Psy|Tele                2
Lit|Rule                2
Lit|Prec                2
Prec|Psy|Rule|Tele      1
Princ|Tele              1
Itpr|Syst               1
Itpr|Psy                1
Itpr|Lit                1
Aut|Prec|Princ          1
Princ|Psy|Syst          1
Itpr|Princ|Syst         1
Lit|Rule|Syst           1
Syst|Tele               1
Aut|Class               1
Lit|Rule|Tele           1
Aut|Itpr|Prec           1
Prec|Psy                1
Aut|Rule                1
Aut|Syst                1
Lit         

In [6]:
value_counts = df['Argumentation_scheme'].value_counts() # Specific column 
df_truncated60 = df[df['Argumentation_scheme'].isin(value_counts[value_counts > 60].index)]
df_truncated20 = df[df['Argumentation_scheme'].isin(value_counts[value_counts > 20].index)]

In [7]:
df_truncated60['Argumentation_scheme'].value_counts()

Prec     279
Itpr      95
Princ     64
Rule      61
Name: Argumentation_scheme, dtype: int64

In [8]:
df_truncated20['Argumentation_scheme'].value_counts()

Prec         279
Itpr          95
Princ         64
Rule          61
Prec|Rule     35
Itpr|Prec     22
Aut           21
Name: Argumentation_scheme, dtype: int64

In [9]:
df_balanced = pd.concat([df[df["Argumentation_scheme"] == "Prec"].sample(61),
                    df[df["Argumentation_scheme"] == "Itpr"].sample(61),
                    df[df["Argumentation_scheme"] == "Princ"].sample(61),
                    df[df["Argumentation_scheme"] == "Rule"]
                    ])

In [10]:
df_balanced["Argumentation_scheme"].value_counts()

Prec     61
Itpr     61
Princ    61
Rule     61
Name: Argumentation_scheme, dtype: int64

Choice of parameters:

In [11]:
# EMBEDDINGS
EMBEDDING = "legal_bert_sentence"

# MODELS
MODEL = "RNN"

# DATASET (uncomment the chosen one)
# DATASET = df
DATASET = df_truncated60
# DATASET = df_truncated20
# DATASET = df_balanced

TEST_SIZE = 0.3
VAL_SIZE = 0.2  # percentage taken from the train subset

# MODEL PARAMETERS

EPOCHS = 250
BATCH_SIZE = 64
LR = 0.0001
DROP = 0.1

L2_FACTOR = 0.001
LAYERS = [512, 256, 128] # 3 layers

In [12]:
aut_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Aut' in str(x) else 0)
class_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Class' in str(x) else 0)
itpr_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Itpr' in str(x) else 0)
lit_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Lit' in str(x) else 0)
prec_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Prec' in str(x) else 0)
princ_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Princ' in str(x) else 0)
psy_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Psy' in str(x) else 0)
rule_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Rule' in str(x) else 0)
syst_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Syst' in str(x) else 0)
tele_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Tele' in str(x) else 0)

df_new = pd.DataFrame(DATASET['Sentence'])
df_new['Aut'] = aut_col
df_new['Class'] = class_col
df_new['Itpr'] = itpr_col
df_new['Lit'] = lit_col
df_new['Prec'] = prec_col
df_new['Princ'] = princ_col
df_new['Psy'] = psy_col
df_new['Rule'] = rule_col
df_new['Syst'] = syst_col
df_new['Tele'] = tele_col

In [13]:
df_new.head()

Unnamed: 0,Sentence,Aut,Class,Itpr,Lit,Prec,Princ,Psy,Rule,Syst,Tele
2,respect borne mind pursuant Article 58 Statute...,0,0,0,0,0,0,0,1,0,0
5,Regarding Wam’s argument Commission’s appeal i...,0,0,0,0,0,0,0,1,0,0
6,Appeals judgments Court First Instance governe...,0,0,0,0,0,0,0,1,0,0
7,Next must noted obligation provide statement r...,0,0,0,0,1,0,0,0,0,0
11,According settled caselaw statement reasons re...,0,0,0,0,1,0,0,0,0,0


# Embedding

In [14]:
if EMBEDDING == "legal_bert_sentence":
  sbert_model = SentenceTransformer("nlpaueb/legal-bert-small-uncased")
  sentence_embeddings = sbert_model.encode(df_new["Sentence"].values)

  X = sentence_embeddings
  X = X.reshape(len(X), 1, X.shape[1])
  y = df_new.drop("Sentence", axis=1)

Some weights of the model checkpoint at /root/.cache/torch/sentence_transformers/nlpaueb_legal-bert-small-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [15]:
print("EMBEDDING: ", EMBEDDING)
print("SHAPE X: ", X.shape)
print("SHAPE y: ", y.shape)

EMBEDDING:  legal_bert_sentence
SHAPE X:  (499, 1, 512)
SHAPE y:  (499, 10)


# Training

In [16]:
if MODEL == "RNN":
  INPUTS = X.shape[-1]
  OUTPUTS = y.shape[-1]

  def create_deep_model(factor, rate):
      model = Sequential()      
      model.add(LSTM(units=LAYERS[0], return_sequences = True, input_dim=INPUTS,kernel_initializer='he_uniform', kernel_regularizer=l2(factor), 
        activation='relu')), Dropout(rate),
      model.add(LSTM(units=LAYERS[1], return_sequences = True, kernel_regularizer=l2(factor),
        activation='relu')), Dropout(rate),
      model.add(LSTM(units=LAYERS[2], kernel_regularizer=l2(factor),
        activation='relu')), Dropout(rate),
      #Output layer
      model.add(Dense(units=OUTPUTS, activation='softmax'))
      return model
  model= create_deep_model(factor=L2_FACTOR, rate=DROP)

  opt=tf.keras.optimizers.Adam(learning_rate=LR)
  model.compile(loss='kullback_leibler_divergence', optimizer=opt,   
  metrics=['accuracy'])

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, shuffle= True)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=VAL_SIZE, shuffle= True)

#fit the model
history=model.fit(x=X_train, y=y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(X_val, y_val), verbose=1)

Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 37/250
Epoch 38/250
Epoch 39/250
Epoch 40/250
Epoch 41/250
Epoch 42/250
Epoch 43/250
Epoch 44/250
Epoch 45/250
Epoch 46/250
Epoch 47/250
Epoch 48/250
Epoch 49/250
Epoch 50/250
Epoch 51/250
Epoch 52/250
Epoch 53/250
Epoch 54/250
Epoch 55/250
Epoch 56/250
Epoch 57/250
Epoch 58/250
Epoch 59/250
Epoch 60/250
Epoch 61/250
Epoch 62/250
Epoch 63/250
Epoch 64/250
Epoch 65/250
Epoch 66/250
Epoch 67/250
Epoch 68/250
Epoch 69/250
Epoch 70/250
Epoch 71/250
Epoch 72/250
Epoch 73/250
Epoch 74/250
Epoch 75/250
Epoch 76/250
Epoch 77/250
Epoch 78

# Evaluation

In [18]:
  from sklearn.metrics import accuracy_score
  yhat = model.predict(X_test)
  # round probabilities to class labels
  yhat = yhat.round()
  # calculate accuracy
  acc = accuracy_score(y_test, yhat)
  # store result
  print('Accuracy: %.3f' % acc)

Accuracy: 0.727


In [19]:
if MODEL == "RNN":
  y_test=y_test.values
  y_pred = model.predict(X_test).round()
  from sklearn.metrics import classification_report
  print(classification_report(y_test, y_pred, zero_division=0, target_names=df_new.columns[1:]))

              precision    recall  f1-score   support

         Aut       0.00      0.00      0.00         0
       Class       0.00      0.00      0.00         0
        Itpr       0.59      0.57      0.58        30
         Lit       0.00      0.00      0.00         0
        Prec       0.84      0.86      0.85        81
       Princ       0.46      0.32      0.37        19
         Psy       0.00      0.00      0.00         0
        Rule       0.67      0.80      0.73        20
        Syst       0.00      0.00      0.00         0
        Tele       0.00      0.00      0.00         0

   micro avg       0.73      0.73      0.73       150
   macro avg       0.26      0.25      0.25       150
weighted avg       0.72      0.73      0.72       150
 samples avg       0.73      0.73      0.73       150



Saving of test parameters and results on file:

In [20]:
with open("multilabel_argumentation_type_classification_results.txt", "a") as f:
    f.write("MODEL: " + MODEL + "\n")
    f.write("EMBEDDING: " + EMBEDDING  + "\n")
    f.write("\n")

    f.write("DATASET:\n")
    f.write(str(DATASET["Argumentation_scheme"].value_counts()))
    f.write("\n\n")
    f.write("NUMBER OF SAMPLES: " + str(len(X)) + "\n")
    f.write("TEST_SIZE: " + str(TEST_SIZE*100) + "%\n")
    f.write("\n")

    f.write("PARAMETERS:\n")

    if MODEL == "RNN":
      f.write("EPOCHS: " + str(EPOCHS) + "\n")
      f.write("BATCH_SIZE: " + str(BATCH_SIZE) + "\n")
      f.write("LAYERS: " + str(LAYERS) + "\n")
      f.write("LR: " + str(LR) + "\n")
      f.write("L2_FACTOR: " + str(L2_FACTOR) + "\n")
      f.write("DROP: " + str(DROP) + "\n")
      f.write("\n")

    if MODEL == "BERT":
      f.write("EPOCHS: " + str(EPOCHS) + "\n")
      f.write("BATCH_SIZE: " + str(BATCH_SIZE) + "\n")
      f.write("LR: " + str(LR) + "\n")
      f.write("DROP: " + str(DROP) + "\n")
      f.write("\n")

    if MODEL == "SVC":
      f.write("KERNEL: " + str(KERNEL) + "\n")
      f.write("GAMMA: " + str(GAMMA) + "\n")
      f.write("C: " + str(C) + "\n")
      f.write("DEGREE: " + str(DEGREE) + "\n")
      f.write("\n")

    f.write("RESULTS:\n")
    f.write(classification_report(y_test, y_pred, zero_division=0, target_names=df_new.columns[1:])+"\n")

    f.write("\n")
    f.write("#######################################################")
    f.write("\n\n")
    

