<a href="https://colab.research.google.com/github/alexlimatds/circle-2022/blob/main/RRLLJ_SBERT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Rhetorical Role Labeling for Legal Judgments - experiments with SBERT features

In this notebook we utilize Sentence BERT (SBERT) features to represent the sentences.
We use the SentenceTransformer library for SBERT implementation.

### Installing dependencies

In [1]:
pip install -U sentence-transformers

Collecting sentence-transformers
  Downloading sentence-transformers-2.2.0.tar.gz (79 kB)
[K     |████████████████████████████████| 79 kB 2.9 MB/s 
[?25hCollecting transformers<5.0.0,>=4.6.0
  Downloading transformers-4.17.0-py3-none-any.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 12.1 MB/s 
Collecting sentencepiece
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 32.3 MB/s 
[?25hCollecting huggingface-hub
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 4.6 MB/s 
Collecting tokenizers!=0.11.3,>=0.11.1
  Downloading tokenizers-0.11.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.5 MB)
[K     |████████████████████████████████| 6.5 MB 40.6 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (

### Loading dataset

In [2]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
g_drive_dir = "/content/gdrive/MyDrive/"

Mounted at /content/gdrive


In [3]:
!mkdir data
!mkdir data/train
!tar -xf {g_drive_dir}AILA_2021/AILA_2021_train.tar.xz -C data/train

train_dir = 'data/train/'

In [4]:
import pandas as pd
from os import listdir

def read_docs(dir_name):
  docs_ = {} # key: file name, value: dataframe with sentences and labels
  labels_ = set()
  for f in listdir(dir_name):
    df = pd.read_csv(
        dir_name + f, 
        sep='\t', 
        names=['sentence', 'label'])
    docs_[f] = df
    labels_.update(df['label'].to_list())
  return docs_, labels_

docs_train, labels_train = read_docs(train_dir)
n_classes = len(labels_train)
print(f'TRAIN: {len(docs_train)} documents read.')
print(f'Number of classes: {n_classes}')

TRAIN: 60 documents read.
Number of classes: 7


### SBERT features

In [5]:
from sentence_transformers import SentenceTransformer

sent_encoder = SentenceTransformer('sentence-transformers/LaBSE')

Downloading:   0%|          | 0.00/391 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/804 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/461 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.88G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/9.62M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/411 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/5.22M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/114 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.36M [00:00<?, ?B/s]

In [6]:
n_features = sent_encoder.get_sentence_embedding_dimension()

In [7]:
%%time
docs_train_features_numpy = {} # key: file id, value: numpy matrix of features
for doc_id, df in docs_train.items():
  docs_train_features_numpy[doc_id] = sent_encoder.encode(df['sentence'].tolist())


CPU times: user 1min 29s, sys: 1.44 s, total: 1min 31s
Wall time: 1min 40s


### Label encoder

In [8]:
from sklearn.preprocessing import LabelBinarizer

label_encoder = LabelBinarizer()
label_encoder.fit(list(labels_train))


LabelBinarizer()

### Evaluation functions

In [9]:
import numpy as np
import sklearn
from sklearn.model_selection import KFold
from sklearn.metrics import precision_recall_fscore_support
from IPython.display import display, HTML

def docs_to_sentences(docs_idx, doc_keys_list):
  features_ = None
  targets_ = []
  for idx in docs_idx:
    doc_id = doc_keys_list[idx]
    if features_ is None:
      features_ = docs_train_features_numpy[doc_id]
    else:
      features_ = np.vstack((features_, docs_train_features_numpy[doc_id]))
    targets_.extend(docs_train[doc_id]['label'].tolist())
  return features_, targets_

def metrics_report(title, averages, stds):
  report_df = pd.DataFrame(columns=['Score', 'Standard Deviation'])
  report_df.loc['Precision'] = [f'{averages[0]:.4f}', f'{stds[0]:.4f}']
  report_df.loc['Recall'] = [f'{averages[1]:.4f}', f'{stds[1]:.4f}']
  report_df.loc['F1'] = [f'{averages[2]:.4f}', f'{stds[2]:.4f}']
  display(HTML(f'<br><span style="font-weight: bold">{title}: cross-validation macro averages</span>'))
  display(report_df)

def classification_report(metrics):
  report_df = pd.DataFrame(columns=['Precision', 'Recall', 'F1'])
  for i, l in enumerate(label_encoder.classes_):
    report_df.loc[l] = [
      f'{metrics[i, 0]:.4f}', 
      f'{metrics[i, 1]:.4f}', 
      f'{metrics[i, 2]:.4f}', 
    ]
  display(HTML(f'<br><span style="font-weight: bold">Classification Report (cross-validation test averages)</span>'))
  display(report_df)

test_metrics = {}

def cross_validation(trainer):
  train_metrics_cross = []
  test_metrics_cross = []
  test_metrics_by_class = np.zeros((n_classes, 3)) # 3 metrics (P, R, F1)
  n_folds = 5
  skf = KFold(n_splits=n_folds) # for cross-validation
  docs_list = list(docs_train.keys())
  for train_docs_idx, test_docs_idx in skf.split(docs_list): # The cross-validation splitting is document-oriented
    # train
    train_features_fold, train_targets_fold = docs_to_sentences(train_docs_idx, docs_list)
    model = trainer(train_features_fold, train_targets_fold)
    # test
    test_features_fold, test_targets_fold = docs_to_sentences(test_docs_idx, docs_list)
    predictions = model.predict(test_features_fold)
    # averaged test metrics
    p_test, r_test, f1_test, _ = precision_recall_fscore_support(
        test_targets_fold, 
        predictions, 
        average='macro', 
        zero_division=0)
    test_metrics_cross.append([p_test, r_test, f1_test])
    # test metrics by class
    metrics = precision_recall_fscore_support(
        test_targets_fold, 
        predictions, 
        average=None, 
        zero_division=0, 
        labels=label_encoder.classes_)
    test_metrics_by_class = test_metrics_by_class + np.hstack((
        metrics[0].reshape(-1, 1),  # precision
        metrics[1].reshape(-1, 1),  # recall
        metrics[1].reshape(-1, 1))) # F1
    # train metrics
    predictions = model.predict(train_features_fold)
    p_train, r_train, f1_train, _ = precision_recall_fscore_support(
        train_targets_fold, 
        predictions, 
        average='macro', 
        zero_division=0)
    train_metrics_cross.append([p_train, r_train, f1_train])
  
  train_metrics_cross = np.array(train_metrics_cross)
  train_mean = np.mean(train_metrics_cross, axis=0)
  train_std = np.std(train_metrics_cross, axis=0)
  metrics_report('TRAIN', train_mean, train_std)

  test_metrics_cross = np.array(test_metrics_cross)
  test_mean = np.mean(test_metrics_cross, axis=0)
  test_std = np.std(test_metrics_cross, axis=0)
  metrics_report('TEST', test_mean, test_std)

  test_metrics_by_class /= n_classes
  classification_report(test_metrics_by_class)

  test_metrics[model.__class__.__name__] = test_mean

### Scikit-learn Models

#### Logistic regression

In [34]:
from sklearn.linear_model import LogisticRegression

def lr_trainer(X, y):
  logreg = LogisticRegression(solver='sag', random_state=1)
  return logreg.fit(X, y)

In [35]:
%%time

cross_validation(lr_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.6468,0.0171
Recall,0.4582,0.0108
F1,0.4967,0.0081


Unnamed: 0,Score,Standard Deviation
Precision,0.5069,0.0588
Recall,0.3826,0.0179
F1,0.4065,0.0223


Unnamed: 0,Precision,Recall,F1
Argument,0.367,0.1515,0.1515
Facts,0.3822,0.4478,0.4478
Precedent,0.3291,0.2166,0.2166
Ratio of the decision,0.3673,0.5017,0.5017
Ruling by Lower Court,0.0714,0.0016,0.0016
Ruling by Present Court,0.5814,0.2919,0.2919
Statute,0.4362,0.3022,0.3022


CPU times: user 41.2 s, sys: 749 ms, total: 42 s
Wall time: 41 s


#### Linear SVM

In [36]:
from sklearn.svm import LinearSVC

def linear_svm_trainer(X, y):
  svm = LinearSVC(random_state=1)
  return svm.fit(X, y)

In [37]:
%%time

cross_validation(linear_svm_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.6912,0.0186
Recall,0.494,0.012
F1,0.5271,0.0086


Unnamed: 0,Score,Standard Deviation
Precision,0.4961,0.0234
Recall,0.3902,0.0135
F1,0.4055,0.0143


Unnamed: 0,Precision,Recall,F1
Argument,0.3788,0.1542,0.1542
Facts,0.3719,0.4609,0.4609
Precedent,0.3158,0.2176,0.2176
Ratio of the decision,0.3715,0.4766,0.4766
Ruling by Lower Court,0.1429,0.0034,0.0034
Ruling by Present Court,0.5055,0.3133,0.3133
Statute,0.3944,0.3251,0.3251


CPU times: user 25.3 s, sys: 858 ms, total: 26.1 s
Wall time: 25.1 s


#### KNN

In [38]:
from sklearn.neighbors import KNeighborsClassifier

def knn_trainer(X, y):
  knn = KNeighborsClassifier(5)
  return knn.fit(X, y)

In [39]:
%%time

cross_validation(knn_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.7197,0.0045
Recall,0.6189,0.0088
F1,0.6459,0.0082


Unnamed: 0,Score,Standard Deviation
Precision,0.4236,0.034
Recall,0.3832,0.0125
F1,0.3894,0.0216


Unnamed: 0,Precision,Recall,F1
Argument,0.1939,0.1802,0.1802
Facts,0.3649,0.4179,0.4179
Precedent,0.2492,0.2843,0.2843
Ratio of the decision,0.3691,0.3674,0.3674
Ruling by Lower Court,0.131,0.0372,0.0372
Ruling by Present Court,0.471,0.3246,0.3246
Statute,0.3391,0.3048,0.3048


CPU times: user 38 s, sys: 4.81 s, total: 42.8 s
Wall time: 26 s


#### MLP Classifier

In [40]:
from sklearn.neural_network import MLPClassifier

def mlp_trainer(X, y):
  # Default MLP from scikit-learn
  mlp = MLPClassifier(early_stopping=True, random_state=1)
  return mlp.fit(X, y)

In [41]:
%%time

cross_validation(mlp_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.6992,0.0361
Recall,0.5728,0.0447
F1,0.6092,0.0457


Unnamed: 0,Score,Standard Deviation
Precision,0.5073,0.082
Recall,0.4073,0.0223
F1,0.4222,0.0207


Unnamed: 0,Precision,Recall,F1
Argument,0.3348,0.1907,0.1907
Facts,0.387,0.4211,0.4211
Precedent,0.3107,0.2425,0.2425
Ratio of the decision,0.3713,0.4719,0.4719
Ruling by Lower Court,0.2437,0.0352,0.0352
Ruling by Present Court,0.4952,0.3564,0.3564
Statute,0.3936,0.3189,0.3189


CPU times: user 39.7 s, sys: 21.1 s, total: 1min
Wall time: 31.4 s


#### Decision Tree

In [42]:
from sklearn.tree import DecisionTreeClassifier

def decision_tree_trainer(X, y):
  dtree = DecisionTreeClassifier(max_depth=5, random_state=1)
  return dtree.fit(X, y)

In [43]:
%%time

cross_validation(decision_tree_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.456,0.055
Recall,0.26,0.0136
F1,0.2617,0.023


Unnamed: 0,Score,Standard Deviation
Precision,0.3457,0.0514
Recall,0.2217,0.028
F1,0.2105,0.0293


Unnamed: 0,Precision,Recall,F1
Argument,0.0935,0.0056,0.0056
Facts,0.2758,0.3637,0.3637
Precedent,0.1997,0.0543,0.0543
Ratio of the decision,0.314,0.4989,0.4989
Ruling by Lower Court,0.0,0.0,0.0
Ruling by Present Court,0.6076,0.1147,0.1147
Statute,0.2375,0.0713,0.0713


CPU times: user 19.2 s, sys: 43.7 ms, total: 19.2 s
Wall time: 19.2 s


#### Random Forest

In [44]:
from sklearn.ensemble import RandomForestClassifier

def random_forest_trainer(X, y):
  rforest = RandomForestClassifier(max_depth=5, n_estimators=10, random_state=1)
  return rforest.fit(X, y)

In [45]:
%%time

cross_validation(random_forest_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.5144,0.0475
Recall,0.2334,0.0063
F1,0.2185,0.0052


Unnamed: 0,Score,Standard Deviation
Precision,0.3517,0.1007
Recall,0.2038,0.0105
F1,0.1817,0.0132


Unnamed: 0,Precision,Recall,F1
Argument,0.1429,0.0027,0.0027
Facts,0.3563,0.2561,0.2561
Precedent,0.2466,0.0043,0.0043
Ratio of the decision,0.2984,0.6378,0.6378
Ruling by Lower Court,0.0,0.0,0.0
Ruling by Present Court,0.7143,0.1179,0.1179
Statute,0.0,0.0,0.0


CPU times: user 5.21 s, sys: 40.9 ms, total: 5.25 s
Wall time: 5.22 s


#### AdaBoost

In [46]:
from sklearn.ensemble import AdaBoostClassifier

def adaboost_trainer(X, y):
  adab = AdaBoostClassifier(random_state=1)
  return adab.fit(X, y)

In [47]:
%%time

cross_validation(adaboost_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.3637,0.0251
Recall,0.3047,0.0128
F1,0.2946,0.0146


Unnamed: 0,Score,Standard Deviation
Precision,0.3232,0.0334
Recall,0.2823,0.0143
F1,0.2657,0.0034


Unnamed: 0,Precision,Recall,F1
Argument,0.2153,0.0221,0.0221
Facts,0.2922,0.3681,0.3681
Precedent,0.1956,0.0392,0.0392
Ratio of the decision,0.3299,0.485,0.485
Ruling by Lower Court,0.0,0.0,0.0
Ruling by Present Court,0.3337,0.2504,0.2504
Statute,0.2491,0.2465,0.2465


CPU times: user 3min 28s, sys: 187 ms, total: 3min 29s
Wall time: 3min 27s


#### Naive Bayes

In [48]:
from sklearn.naive_bayes import GaussianNB

def naive_bayes_trainer(X, y):
  nb = GaussianNB()
  return nb.fit(X, y)

In [49]:
%%time
cross_validation(naive_bayes_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.4204,0.0026
Recall,0.5038,0.0045
F1,0.4357,0.006


Unnamed: 0,Score,Standard Deviation
Precision,0.3764,0.0212
Recall,0.4456,0.0276
F1,0.385,0.013


Unnamed: 0,Precision,Recall,F1
Argument,0.2216,0.2584,0.2584
Facts,0.3656,0.4022,0.4022
Precedent,0.288,0.2326,0.2326
Ratio of the decision,0.4251,0.2698,0.2698
Ruling by Lower Court,0.076,0.2105,0.2105
Ruling by Present Court,0.2572,0.3715,0.3715
Statute,0.2486,0.4827,0.4827


CPU times: user 3.12 s, sys: 72.3 ms, total: 3.2 s
Wall time: 3.17 s


#### XGBoost

In [50]:
from xgboost.sklearn import XGBClassifier

def xgboost_trainer(X, y):
  if torch.cuda.is_available():
    xgboost = XGBClassifier(objective="multi:softmax", tree_method='gpu_hist', gpu_id=0)
  else:
    xgboost = XGBClassifier(objective="multi:softmax", tree_method='hist')
  return xgboost.fit(X, y)

In [51]:
%%time
cross_validation(xgboost_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.8525,0.0031
Recall,0.5804,0.0188
F1,0.6488,0.018


Unnamed: 0,Score,Standard Deviation
Precision,0.5085,0.0543
Recall,0.3386,0.017
F1,0.3609,0.0159


Unnamed: 0,Precision,Recall,F1
Argument,0.3904,0.0735,0.0735
Facts,0.3754,0.4361,0.4361
Precedent,0.3637,0.1658,0.1658
Ratio of the decision,0.3497,0.5375,0.5375
Ruling by Lower Court,0.0,0.0,0.0
Ruling by Present Court,0.6313,0.2483,0.2483
Statute,0.4319,0.2316,0.2316


CPU times: user 1min 24s, sys: 1.15 s, total: 1min 25s
Wall time: 1min 25s


### PyTorch models

In [10]:
import torch

gpu_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [11]:
from torch.utils.data import Dataset

class MyDataset(Dataset):
  def __init__(self, inputs, targets, device):
    self.X = torch.from_numpy(inputs).float().to(device)
    self.y = torch.from_numpy(label_encoder.transform(targets)).float().to(device)

  def __len__(self):
    return len(self.X)

  def __getitem__(self, idx):
    return [self.X[idx], self.y[idx]]


In [53]:
from torch.optim import Adam
from torch.utils.data import DataLoader
from sklearn.model_selection import ShuffleSplit

torch.manual_seed(1)

class MLPTrainer:

  def __init__(self, model, device, l2_penalty=0.0001):
    self.model = model.to(device)
    setattr(self.model.__class__, 'predict', self.predict)
    self.device = device
    # The training replicates the default configuration from scikit-learn's MLPClassifier
    self.criterion = torch.nn.CrossEntropyLoss().to(device)
    self.lambd = l2_penalty # weight decay for Adam optmizer
    self.n_epochs = 200

  def fit(self, inputs, targets, verbose=False):
    # early stopping params and variables
    tol = 0.001
    n_iter_no_change = 7
    early_stop_count = 0
    previous_loss_validation = float("inf")
    # splitting train data into train and validation sets in order to perform early stopping
    spl = ShuffleSplit(n_splits=1, train_size=0.9, random_state=1)
    targets = np.array(targets)
    for train_index, val_index in spl.split(inputs):
      # getting datasets
      train_x = inputs[train_index]
      train_y = targets[train_index]
      validation_x = inputs[val_index]
      validation_y = targets[val_index]
      train_dl = DataLoader(
        MyDataset(train_x, train_y, self.device), 
        batch_size=64)
      validation_dl = DataLoader(
        MyDataset(validation_x, validation_y, self.device), 
        batch_size=len(validation_x))
      # training
      self.model.train()
      optimizer = Adam(
        self.model.parameters(), 
        weight_decay=self.lambd)
      for epoch in range(self.n_epochs):
        # iterate mini batches
        for x, y in train_dl:
          optimizer.zero_grad()
          yhat = self.model(x)
          loss = self.criterion(yhat, y)
          loss.backward()
          optimizer.step()
        # Early stopping
        for x, y in validation_dl:
          loss_validation = self.criterion(self.model(x), y)
        if loss_validation >= previous_loss_validation - tol:
          early_stop_count += 1
        else:
          early_stop_count = 0
        if early_stop_count == n_iter_no_change:
          break
        previous_loss_validation = loss_validation
    if verbose:
      print(f'TRAIN: Stopped at epoch {epoch + 1} {"(MAX EPOCH)" if epoch + 1 == self.n_epochs else ""}')
    
    self.model.eval()
    return self.model

  def predict(self, X):
    y = self.model.forward(torch.from_numpy(X).float().to(self.device))
    return label_encoder.inverse_transform(y.detach().to('cpu').numpy())

#### TorchMLP

In [13]:
import torch.nn
from torch.nn.init import xavier_uniform_
from torch.nn.init import kaiming_uniform_

class TorchMLP(torch.nn.Module):
  def __init__(self, n_inputs, n_classes):
    super(TorchMLP, self).__init__()
    # hidden layer
    n_hidden_units = 100
    hidden1 = torch.nn.Linear(n_inputs, n_hidden_units)
    kaiming_uniform_(hidden1.weight, nonlinearity='relu')
    relu = torch.nn.ReLU()
    # output layer
    output = torch.nn.Linear(n_hidden_units, n_classes)
    xavier_uniform_(output.weight)
    # There's no need of softmax function because it's included in the CrossEntropyLoss function
    self.layers = torch.nn.Sequential(
      hidden1, 
      relu, 
      output)
  
  def forward(self, X):
    return self.layers(X)

In [17]:
def torch_mlp_trainer(X, y):
  trainer = MLPTrainer(
      TorchMLP(n_features, n_classes), 
      gpu_device, 
      l2_penalty=0.0015)
  return trainer.fit(X, y, verbose=True)

In [54]:
%%time
cross_validation(torch_mlp_trainer)

TRAIN: Stopped at epoch 28 
TRAIN: Stopped at epoch 19 
TRAIN: Stopped at epoch 26 
TRAIN: Stopped at epoch 26 
TRAIN: Stopped at epoch 23 


Unnamed: 0,Score,Standard Deviation
Precision,0.6504,0.0575
Recall,0.4608,0.0157
F1,0.4958,0.0122


Unnamed: 0,Score,Standard Deviation
Precision,0.4934,0.0406
Recall,0.3853,0.0228
F1,0.4086,0.0204


Unnamed: 0,Precision,Recall,F1
Argument,0.3416,0.1741,0.1741
Facts,0.3863,0.4369,0.4369
Precedent,0.3459,0.2121,0.2121
Ratio of the decision,0.369,0.511,0.511
Ruling by Lower Court,0.0,0.0,0.0
Ruling by Present Court,0.592,0.2846,0.2846
Statute,0.4323,0.3076,0.3076


CPU times: user 32.5 s, sys: 1.16 s, total: 33.6 s
Wall time: 34.3 s


#### TorchMLPMaxPool

In [31]:
import math

class TorchMLPMaxPool(torch.nn.Module):
  def __init__(self, n_inputs, n_classes):
    super(TorchMLPMaxPool, self).__init__()
    # max pool
    window_size = 2
    max_pool = torch.nn.MaxPool1d(window_size, ceil_mode=True)
    n_out_pool = math.ceil((n_inputs - window_size) / window_size + 1)
    # hidden layers
    n_hidden_units = 100
    hidden1 = torch.nn.Linear(n_out_pool, n_hidden_units)
    kaiming_uniform_(hidden1.weight, nonlinearity='relu')
    relu = torch.nn.ReLU()
    # output layer
    output = torch.nn.Linear(n_hidden_units, n_classes)
    xavier_uniform_(output.weight)
    # There's no need of softmax function because it's included in the CrossEntropyLoss function
    self.layers = torch.nn.Sequential(
      max_pool, 
      hidden1, 
      relu, 
      output)
  
  def forward(self, X):
    return self.layers(X)

In [32]:
def torch_mlp_maxpool_trainer(X, y):
  trainer = MLPTrainer(
      TorchMLPMaxPool(n_features, n_classes), 
      gpu_device, 
      l2_penalty=0.0015)
  return trainer.fit(X, y, verbose=True)

In [58]:
%%time
cross_validation(torch_mlp_maxpool_trainer)

TRAIN: Stopped at epoch 28 
TRAIN: Stopped at epoch 22 
TRAIN: Stopped at epoch 23 
TRAIN: Stopped at epoch 26 
TRAIN: Stopped at epoch 26 


Unnamed: 0,Score,Standard Deviation
Precision,0.5769,0.0072
Recall,0.4063,0.0096
F1,0.4432,0.0087


Unnamed: 0,Score,Standard Deviation
Precision,0.5056,0.0373
Recall,0.3449,0.0226
F1,0.3703,0.0223


Unnamed: 0,Precision,Recall,F1
Argument,0.3993,0.1122,0.1122
Facts,0.3825,0.4335,0.4335
Precedent,0.3343,0.2096,0.2096
Ratio of the decision,0.3551,0.5214,0.5214
Ruling by Lower Court,0.0,0.0,0.0
Ruling by Present Court,0.6193,0.2043,0.2043
Statute,0.4372,0.2432,0.2432


CPU times: user 33.3 s, sys: 1.27 s, total: 34.5 s
Wall time: 34.7 s


### Summary

In [59]:
from IPython.display import display, update_display

metrics_df = pd.DataFrame(columns=['Precision', 'Recall', 'F1'])
for model_name, metrics in test_metrics.items():
  metrics_df.loc[model_name] = [f'{metrics[0]:.4f}', f'{metrics[1]:.4f}', f'{metrics[2]:.4f}']
metrics_display = display(metrics_df, display_id='metrics_table')

Unnamed: 0,Precision,Recall,F1
LogisticRegression,0.5069,0.3826,0.4065
LinearSVC,0.4961,0.3902,0.4055
KNeighborsClassifier,0.4236,0.3832,0.3894
MLPClassifier,0.5073,0.4073,0.4222
DecisionTreeClassifier,0.3457,0.2217,0.2105
RandomForestClassifier,0.3517,0.2038,0.1817
AdaBoostClassifier,0.3232,0.2823,0.2657
GaussianNB,0.3764,0.4456,0.385
XGBClassifier,0.5085,0.3386,0.3609
TorchMLP,0.4934,0.3853,0.4086
