# Task for the identification of argumentative sentence types with a multilabel approach

Files needed to run the notebook:

- The pickle file *dataset.pkl*; 

Import libraries:

In [1]:
pip install sentence_transformers



In [2]:
import pickle as pkl
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import os
import json
import numpy as np

from sentence_transformers import SentenceTransformer
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Activation, Dropout, LSTM, Input, GRU, BatchNormalization
from tensorflow.keras.regularizers import l2

In [3]:
df = pd.read_pickle("./dataset.pkl")  

In [4]:
df.head()

Unnamed: 0,Document,Name,Id,Sentence,Type,Supported_by,Supported_from_failure,Attacked_by,Inhibited_by,Rephrased_by,Argumentation_scheme
2,2004,prem,A3,respect borne mind pursuant Article 58 Statute...,L,,,,,,Rule
3,2004,prem,A4,Advocate General states point 20 Opinion Commi...,F,,,,,,Aut
5,2004,prem,A6,Regarding Wam’s argument Commission’s appeal i...,L|F,,,,,,Rule
6,2004,prem,A7,Appeals judgments Court First Instance governe...,L,,,,,,Rule
7,2004,prem,A8,Next must noted obligation provide statement r...,L,,,,,,Prec


In [5]:
df["Argumentation_scheme"].values[0]

'Rule'

In [6]:
df["Argumentation_scheme"].value_counts()

Prec                  279
Itpr                   95
Princ                  64
Rule                   61
Prec|Rule              35
Itpr|Prec              22
Aut                    21
Class|Prec|Rule         9
Tele                    8
Class                   7
Class|Prec              5
Aut|Itpr                5
Prec|Tele               5
Itpr|Tele               4
Rule|Syst               3
Itpr|Rule               3
Prec|Syst               3
Syst                    3
Princ|Rule              2
Psy|Tele                2
Lit|Rule                2
Lit|Prec                2
Prec|Psy|Rule|Tele      1
Princ|Tele              1
Itpr|Syst               1
Itpr|Psy                1
Itpr|Lit                1
Aut|Prec|Princ          1
Princ|Psy|Syst          1
Itpr|Princ|Syst         1
Lit|Rule|Syst           1
Syst|Tele               1
Aut|Class               1
Lit|Rule|Tele           1
Aut|Itpr|Prec           1
Prec|Psy                1
Aut|Rule                1
Aut|Syst                1
Lit         

In [7]:
value_counts = df['Argumentation_scheme'].value_counts() # Specific column 
df_truncated60 = df[df['Argumentation_scheme'].isin(value_counts[value_counts > 60].index)]
df_truncated20 = df[df['Argumentation_scheme'].isin(value_counts[value_counts > 20].index)]

In [8]:
df_truncated60['Argumentation_scheme'].value_counts()

Prec     279
Itpr      95
Princ     64
Rule      61
Name: Argumentation_scheme, dtype: int64

In [9]:
df_truncated20['Argumentation_scheme'].value_counts()

Prec         279
Itpr          95
Princ         64
Rule          61
Prec|Rule     35
Itpr|Prec     22
Aut           21
Name: Argumentation_scheme, dtype: int64

In [10]:
df_balanced = pd.concat([df[df["Argumentation_scheme"] == "Prec"].sample(61),
                    df[df["Argumentation_scheme"] == "Itpr"].sample(61),
                    df[df["Argumentation_scheme"] == "Princ"].sample(61),
                    df[df["Argumentation_scheme"] == "Rule"]
                    ])

In [11]:
df_balanced["Argumentation_scheme"].value_counts()

Prec     61
Itpr     61
Princ    61
Rule     61
Name: Argumentation_scheme, dtype: int64

Choice of parameters:

In [12]:
# EMBEDDINGS
EMBEDDING = "legal_bert_sentence"

# MODELS
MODEL = "RNN"

# DATASET (uncomment the chosen one)
# DATASET = df
# DATASET = df_truncated60
DATASET = df_truncated20
# DATASET = df_balanced

TEST_SIZE = 0.3
VAL_SIZE = 0.2  # percentage taken from the train subset

# MODEL PARAMETERS

EPOCHS = 200
BATCH_SIZE = 64
LR = 10e-7
DROP = 0.2

L2_FACTOR = 0.001
LAYERS = [256, 128, 64] # 3 layers

In [13]:
aut_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Aut' in str(x) else 0)
class_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Class' in str(x) else 0)
itpr_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Itpr' in str(x) else 0)
lit_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Lit' in str(x) else 0)
prec_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Prec' in str(x) else 0)
princ_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Princ' in str(x) else 0)
psy_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Psy' in str(x) else 0)
rule_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Rule' in str(x) else 0)
syst_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Syst' in str(x) else 0)
tele_col = df['Argumentation_scheme'].apply(lambda x: 1 if 'Tele' in str(x) else 0)

df_new = pd.DataFrame(DATASET['Sentence'])
df_new['Aut'] = aut_col
df_new['Class'] = class_col
df_new['Itpr'] = itpr_col
df_new['Lit'] = lit_col
df_new['Prec'] = prec_col
df_new['Princ'] = princ_col
df_new['Psy'] = psy_col
df_new['Rule'] = rule_col
df_new['Syst'] = syst_col
df_new['Tele'] = tele_col

In [14]:
df_new.head()

Unnamed: 0,Sentence,Aut,Class,Itpr,Lit,Prec,Princ,Psy,Rule,Syst,Tele
2,respect borne mind pursuant Article 58 Statute...,0,0,0,0,0,0,0,1,0,0
3,Advocate General states point 20 Opinion Commi...,1,0,0,0,0,0,0,0,0,0
5,Regarding Wam’s argument Commission’s appeal i...,0,0,0,0,0,0,0,1,0,0
6,Appeals judgments Court First Instance governe...,0,0,0,0,0,0,0,1,0,0
7,Next must noted obligation provide statement r...,0,0,0,0,1,0,0,0,0,0


# Embedding

In [15]:
if EMBEDDING == "legal_bert_sentence":
  sbert_model = SentenceTransformer("nlpaueb/legal-bert-small-uncased")
  sentence_embeddings = sbert_model.encode(df_new["Sentence"].values)

  X = sentence_embeddings
  # X = X.reshape(len(X), 1, X.shape[1])
  y = df_new.drop("Sentence", axis=1)

Some weights of the model checkpoint at /root/.cache/torch/sentence_transformers/nlpaueb_legal-bert-small-uncased were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [16]:
#y = y.apply(lambda row: row/list(row).count(1) if 1 in list(row) else row, axis=1)

In [17]:
y.head()

Unnamed: 0,Aut,Class,Itpr,Lit,Prec,Princ,Psy,Rule,Syst,Tele
2,0,0,0,0,0,0,0,1,0,0
3,1,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,1,0,0
6,0,0,0,0,0,0,0,1,0,0
7,0,0,0,0,1,0,0,0,0,0


In [18]:
print("EMBEDDING: ", EMBEDDING)
print("SHAPE X: ", X.shape)
print("SHAPE y: ", y.shape)

EMBEDDING:  legal_bert_sentence
SHAPE X:  (577, 512)
SHAPE y:  (577, 10)


# Training

In [19]:
if MODEL == "RNNout":
  INPUTS = X.shape[-1]
  OUTPUTS = y.shape[-1]

  def create_deep_model(factor, rate):
      model = Sequential()      
      model.add(LSTM(units=LAYERS[0], return_sequences = True, input_dim=INPUTS,kernel_initializer='he_uniform', kernel_regularizer=l2(factor), 
        activation='relu')), Dropout(rate),
      model.add(LSTM(units=LAYERS[1], return_sequences = True, kernel_regularizer=l2(factor),
        activation='relu')), Dropout(rate),
      model.add(LSTM(units=LAYERS[2], kernel_regularizer=l2(factor),
        activation='relu')), Dropout(rate),
      #Output layer
      model.add(Dense(units=OUTPUTS, activation='softmax'))
      return model
  model= create_deep_model(factor=L2_FACTOR, rate=DROP)

  opt=tf.keras.optimizers.Adam(learning_rate=LR)
  model.compile(loss='kullback_leibler_divergence', optimizer=opt,   
  metrics=['accuracy'])

if MODEL == "RNN":
  INPUTS = X.shape[-1]
  OUTPUTS = y.shape[-1]

  def create_deep_model(factor, rate):

      input= Input(shape=(INPUTS, 1), name="input")
      print(input.shape)
      x = LSTM(units=LAYERS[0], return_sequences = True,kernel_initializer='he_uniform', kernel_regularizer=l2(factor), activation='relu')(input)
      x = Dropout(rate)(x)
      x = LSTM(units=LAYERS[1], return_sequences = True,kernel_initializer='he_uniform', kernel_regularizer=l2(factor), activation='relu')(x)
      x = Dropout(rate)(x)
      x = LSTM(units=LAYERS[2], kernel_regularizer=l2(factor), activation='relu')(x)
      x = Dropout(rate)(x)
      #Output layer
      aut_output  = Dense(units=1, activation='sigmoid', name="aut")(x)
      class_output  = Dense(units=1, activation='sigmoid', name="class")(x)
      itpr_output  = Dense(units=1, activation='sigmoid', name="itpr")(x)
      lit_output  = Dense(units=1, activation='sigmoid', name="lit")(x)
      prec_output  = Dense(units=1, activation='sigmoid', name="prec")(x)
      princ_output  = Dense(units=1, activation='sigmoid', name="princ")(x)
      psy_output  = Dense(units=1, activation='sigmoid', name="psy")(x)
      rule_output  = Dense(units=1, activation='sigmoid', name="rule")(x)
      syst_output  = Dense(units=1, activation='sigmoid', name="syst")(x)
      tele_output  = Dense(units=1, activation='sigmoid', name="tele")(x)
      model = Model(
        inputs = input,
        outputs = [
          aut_output,
          class_output,
          itpr_output,
          lit_output,
          prec_output,
          princ_output,
          psy_output,
          rule_output,
          syst_output,
          tele_output]
    )
      return model
  model= create_deep_model(factor=L2_FACTOR, rate=DROP)

  opt=tf.keras.optimizers.Adam(learning_rate=LR, clipnorm=1.0, clipvalue=0.5)
  # opt = tf.keras.optimizers.SGD(learning_rate=LR, momentum=0.9)
  loss = tf.keras.losses.BinaryCrossentropy(axis=1)
  model.compile(loss=loss, optimizer=opt,   
  metrics=['accuracy'])

(None, 512, 1)












In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, shuffle= True)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=VAL_SIZE, shuffle= True)

#fit the model
history=model.fit(x=X_train, y={
                  "aut": y_train['Aut'],
                  "class": y_train["Class"], 
                  "itpr": y_train["Itpr"],
                  "lit": y_train["Lit"],  
                  "prec": y_train["Prec"],  
                  "princ": y_train["Princ"],  
                  "psy": y_train["Psy"],  
                  "rule": y_train["Rule"],  
                  "syst": y_train["Syst"],   
                  "tele": y_train['Tele'] 
                  },
                  batch_size=BATCH_SIZE, epochs=EPOCHS,
                  validation_data=(X_val, {
                  "aut": y_val['Aut'],
                  "class": y_val["Class"], 
                  "itpr": y_val["Itpr"],
                  "lit": y_val["Lit"],  
                  "prec": y_val["Prec"],  
                  "princ": y_val["Princ"],  
                  "psy": y_val["Psy"],  
                  "rule": y_val["Rule"],  
                  "syst": y_val["Syst"],   
                  "tele": y_val['Tele'] }),
                  verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

# Evaluation

In [None]:
  from sklearn.metrics import accuracy_score
  
  y_pred = model.predict(X_test)
  # round probabilities to class labels
  aut_pred = y_pred[0].round()
  class_pred = y_pred[1].round()
  itpr_pred = y_pred[2].round()
  lit_pred = y_pred[3].round()
  prec_pred = y_pred[4].round()
  princ_pred = y_pred[5].round()
  psy_pred = y_pred[6].round()
  rule_pred = y_pred[7].round()
  syst_pred = y_pred[8].round()
  tele_pred = y_pred[9].round()

  aut_test = np.round(y_test['Aut']).to_numpy()
  class_test = np.round(y_test['Class']).to_numpy()
  itpr_test = np.round(y_test['Itpr']).to_numpy()
  lit_test = np.round(y_test['Lit']).to_numpy()
  prec_test = np.round(y_test['Prec']).to_numpy()
  princ_test = np.round(y_test['Princ']).to_numpy()
  psy_test = np.round(y_test['Psy']).to_numpy()
  rule_test = np.round(y_test['Rule']).to_numpy()
  syst_test = np.round(y_test['Syst']).to_numpy()
  tele_test = np.round(y_test['Tele']).to_numpy()
  # calculate accuracy
  aut_acc = accuracy_score(aut_test, aut_pred)
  class_acc = accuracy_score(class_test, class_pred)
  itpr_acc = accuracy_score(itpr_test, itpr_pred)
  lit_acc = accuracy_score(lit_test, lit_pred)
  prec_acc = accuracy_score(prec_test, prec_pred)
  princ_acc = accuracy_score(princ_test, princ_pred)
  psy_acc = accuracy_score(psy_test, psy_pred)
  rule_acc = accuracy_score(rule_test, rule_pred)
  syst_acc = accuracy_score(syst_test, syst_pred)
  tele_acc = accuracy_score(tele_test, tele_pred)
  # store result
  print('Aut Accuracy:%.3f' % aut_acc)
  print('Class Accuracy:%.3f' % class_acc)
  print('Itpr Accuracy:%.3f' % itpr_acc)
  print('Lit Accuracy:%.3f' % lit_acc)
  print('Prec Accuracy:%.3f' % prec_acc)
  print('Princ Accuracy:%.3f' % princ_acc)
  print('Psy Accuracy:%.3f' % psy_acc)
  print('Rule Accuracy:%.3f' % rule_acc)
  print('Syst Accuracy:%.3f' % syst_acc)
  print('Tele Accuracy:%.3f' % tele_acc)

  

In [33]:
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.metrics import coverage_error
from sklearn.metrics import label_ranking_average_precision_score
if MODEL == "RNN":
  y_test=y_test.values
  y_pred = np.array(model.predict(X_test))
  y_pred = y_pred.reshape(y_pred.shape[1], y_pred.shape[0])
  # y_pred = np.round(y_pred)
  from sklearn.metrics import classification_report
  # print(classification_report(y_test, y_pred, zero_division=0, target_names=df_new.columns[1:]))
  print("Coverage error: ", coverage_error(y_test, y_pred))
  print("Label ranking average precision: ", label_ranking_average_precision_score(y_test, y_pred))

Coverage error:  5.833333333333333
Label ranking average precision:  0.2992690658638938


Saving of test parameters and results on file:

In [24]:
with open("multilabel_argumentation_type_classification_results.txt", "a") as f:
    f.write("MODEL: " + MODEL + "\n")
    f.write("EMBEDDING: " + EMBEDDING  + "\n")
    f.write("\n")

    f.write("DATASET:\n")
    f.write(str(DATASET["Argumentation_scheme"].value_counts()))
    f.write("\n\n")
    f.write("NUMBER OF SAMPLES: " + str(len(X)) + "\n")
    f.write("TEST_SIZE: " + str(TEST_SIZE*100) + "%\n")
    f.write("\n")

    f.write("PARAMETERS:\n")

    if MODEL == "RNN":
      f.write("EPOCHS: " + str(EPOCHS) + "\n")
      f.write("BATCH_SIZE: " + str(BATCH_SIZE) + "\n")
      f.write("LAYERS: " + str(LAYERS) + "\n")
      f.write("LR: " + str(LR) + "\n")
      f.write("L2_FACTOR: " + str(L2_FACTOR) + "\n")
      f.write("DROP: " + str(DROP) + "\n")
      f.write("\n")

    f.write("RESULTS:\n")
    f.write(classification_report(y_test, y_pred, zero_division=0, target_names=df_new.columns[1:])+"\n")

    f.write("\n")
    f.write("#######################################################")
    f.write("\n\n")
    



ValueError: ignored