<a href="https://colab.research.google.com/github/alexlimatds/circle-2022/blob/main/RRLLJ_TF_IDF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Rhetorical Role Labeling for Legal Judgments - experiments with TF-IDF features

In this notebook we utilize TF-IDF features to represent the sentences.

### Loading dataset

In [1]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
g_drive_dir = "/content/gdrive/MyDrive/"

Mounted at /content/gdrive


In [2]:
!mkdir data
!mkdir data/train
!tar -xf {g_drive_dir}AILA_2021/AILA_2021_train.tar.xz -C data/train

train_dir = 'data/train/'

In [3]:
import pandas as pd
from os import listdir

def read_docs(dir_name):
  docs = {} # key: file name, value: dataframe with sentences and labels
  for f in listdir(dir_name):
    df = pd.read_csv(
        dir_name + f, 
        sep='\t', 
        names=['sentence', 'label'])
    docs[f] = df
  return docs

docs_train = read_docs(train_dir)
print(f'TRAIN: {len(docs_train)} documents read.')

TRAIN: 60 documents read.


In [4]:
def sentences_to_list(docs):
  sentences_list = []
  targets_list = []
  for df in docs.values():
    sentences_list.extend(df['sentence'].tolist())
    targets_list.extend(df['label'].tolist())
  return sentences_list, targets_list

def target_stats(set_name, targets):
  stats = {}
  for t in targets:
    stats[t] = stats.get(t, 0) + 1
  print(f'Statistics of the {set_name} set:')
  print(f'   Total number of sentences: {len(targets)}')
  for t, n in stats.items():
    print(f'   Number of {t} labels: {n}')

sentences_train, train_labels = sentences_to_list(docs_train)

target_stats('TRAIN', train_labels)

Statistics of the TRAIN set:
   Total number of sentences: 10024
   Number of Ratio of the decision labels: 3919
   Number of Facts labels: 2368
   Number of Argument labels: 901
   Number of Ruling by Lower Court labels: 341
   Number of Statute labels: 671
   Number of Precedent labels: 1523
   Number of Ruling by Present Court labels: 301


### TF-IDF features

In [5]:
import re

def preprocess(str):
  pstr = str
  pstr = re.sub(r'[/(){}\[\]\|@,;]', ' ', pstr) # replaces symbols with spaces
  pstr = re.sub(r'[^0-9a-z #+_]', '', pstr)     # removes bad symbols
  pstr = re.sub(r'\d+', '', pstr)               # removes numbers
  return pstr

In [6]:
%%time
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_model = TfidfVectorizer(
    preprocessor=preprocess, 
    ngram_range=(1, 3), 
    min_df=10)
tfidf_model.fit(sentences_train)

n_features = len(tfidf_model.vocabulary_)
print(f'Learned {n_features} terms.')

Learned 7438 terms.
CPU times: user 2.05 s, sys: 218 ms, total: 2.27 s
Wall time: 2.97 s


In [7]:
docs_train_tfidf = {} # key: file id, value: matrix of features
for doc_id, df in docs_train.items():
  docs_train_tfidf[doc_id] = tfidf_model.transform(df['sentence'].tolist()).toarray()


### Label encoder

In [8]:
from sklearn.preprocessing import LabelBinarizer

label_encoder = LabelBinarizer()
label_encoder.fit(train_labels)

docs_train_targets = {} # key: file id, value: matrix of one-hot encoded labels
for doc_id, df in docs_train.items():
  docs_train_targets[doc_id] = label_encoder.transform(df['label'].tolist())

n_classes = label_encoder.classes_.shape[0]
n_classes

7

### Evaluation functions

In [9]:
import numpy as np
import sklearn
from sklearn.model_selection import KFold
from sklearn.metrics import precision_recall_fscore_support
from IPython.display import display, HTML

def docs_to_sentences(docs_idx, doc_keys_list):
  features_ = None
  targets_ = []
  for idx in docs_idx:
    doc_id = doc_keys_list[idx]
    if features_ is None:
      features_ = docs_train_tfidf[doc_id]
    else:
      features_ = np.vstack((features_, docs_train_tfidf[doc_id]))
    targets_.extend(docs_train[doc_id]['label'].tolist())
  return features_, targets_

def metrics_report(title, averages, stds):
  report_df = pd.DataFrame(columns=['Score', 'Standard Deviation'])
  report_df.loc['Precision'] = [f'{averages[0]:.4f}', f'{stds[0]:.4f}']
  report_df.loc['Recall'] = [f'{averages[1]:.4f}', f'{stds[1]:.4f}']
  report_df.loc['F1'] = [f'{averages[2]:.4f}', f'{stds[2]:.4f}']
  display(HTML(f'<br><span style="font-weight: bold">{title}: cross-validation macro averages</span>'))
  display(report_df)  

def classification_report(metrics):
  report_df = pd.DataFrame(columns=['Precision', 'Recall', 'F1'])
  for i, l in enumerate(label_encoder.classes_):
    report_df.loc[l] = [
      f'{metrics[i, 0]:.4f}', 
      f'{metrics[i, 1]:.4f}', 
      f'{metrics[i, 2]:.4f}', 
    ]
  display(HTML(f'<br><span style="font-weight: bold">Classification Report (cross-validation test averages)</span>'))
  display(report_df)

test_metrics = {}

def cross_validation(trainer):
  train_metrics_cross = []
  test_metrics_cross = []
  test_metrics_by_class = np.zeros((n_classes, 3)) # 3 metrics (P, R, F1)
  n_folds = 5
  skf = KFold(n_splits=n_folds) # for cross-validation
  docs_list = list(docs_train.keys())
  for train_docs_idx, test_docs_idx in skf.split(docs_list): # The cross-validation splitting is document-oriented
    # train
    train_features_fold, train_targets_fold = docs_to_sentences(train_docs_idx, docs_list)
    model = trainer(train_features_fold, train_targets_fold)
    # test
    test_features_fold, test_targets_fold = docs_to_sentences(test_docs_idx, docs_list)
    predictions = model.predict(test_features_fold)
    # averaged test metrics
    p_test, r_test, f1_test, _ = precision_recall_fscore_support(
        test_targets_fold, 
        predictions, 
        average='macro', 
        zero_division=0)
    test_metrics_cross.append([p_test, r_test, f1_test])
    # test metrics by class
    metrics = precision_recall_fscore_support(
        test_targets_fold, 
        predictions, 
        average=None, 
        zero_division=0, 
        labels=label_encoder.classes_)
    test_metrics_by_class = test_metrics_by_class + np.hstack((
        metrics[0].reshape(-1, 1),  # precision
        metrics[1].reshape(-1, 1),  # recall
        metrics[1].reshape(-1, 1))) # F1
    # train metrics
    predictions = model.predict(train_features_fold)
    p_train, r_train, f1_train, _ = precision_recall_fscore_support(
        train_targets_fold, 
        predictions, 
        average='macro', 
        zero_division=0)
    train_metrics_cross.append([p_train, r_train, f1_train])
  
  train_metrics_cross = np.array(train_metrics_cross)
  train_mean = np.mean(train_metrics_cross, axis=0)
  train_std = np.std(train_metrics_cross, axis=0)
  metrics_report('TRAIN', train_mean, train_std)

  test_metrics_cross = np.array(test_metrics_cross)
  test_mean = np.mean(test_metrics_cross, axis=0)
  test_std = np.std(test_metrics_cross, axis=0)
  metrics_report('TEST', test_mean, test_std)

  test_metrics_by_class /= n_classes
  classification_report(test_metrics_by_class)

  test_metrics[model.__class__.__name__] = test_mean

### Scikit-learn Models

#### Logistic regression

In [10]:
from sklearn.linear_model import LogisticRegression

def lr_trainer(X, y):
  logreg = LogisticRegression(solver='sag', random_state=1)
  return logreg.fit(X, y)

In [11]:
%%time

cross_validation(lr_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.8579,0.0043
Recall,0.5462,0.0137
F1,0.6076,0.0111


Unnamed: 0,Score,Standard Deviation
Precision,0.6107,0.0913
Recall,0.3528,0.0376
F1,0.3908,0.0439


Unnamed: 0,Precision,Recall,F1
Argument,0.4971,0.1489,0.1489
Facts,0.3889,0.3935,0.3935
Precedent,0.3492,0.1786,0.1786
Ratio of the decision,0.354,0.5622,0.5622
Ruling by Lower Court,0.2857,0.005,0.005
Ruling by Present Court,0.686,0.2937,0.2937
Statute,0.4926,0.1824,0.1824


CPU times: user 7min 33s, sys: 2.54 s, total: 7min 36s
Wall time: 7min 33s


#### Linear SVM

In [12]:
from sklearn.svm import LinearSVC

def linear_svm_trainer(X, y):
  svm = LinearSVC(random_state=1)
  return svm.fit(X, y)

In [13]:
%%time

cross_validation(linear_svm_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.9507,0.0024
Recall,0.8934,0.0023
F1,0.9197,0.0023


Unnamed: 0,Score,Standard Deviation
Precision,0.4876,0.0477
Recall,0.3815,0.0379
F1,0.4096,0.0347


Unnamed: 0,Precision,Recall,F1
Argument,0.3527,0.2121,0.2121
Facts,0.3601,0.3821,0.3821
Precedent,0.267,0.2436,0.2436
Ratio of the decision,0.3656,0.4609,0.4609
Ruling by Lower Court,0.1333,0.0281,0.0281
Ruling by Present Court,0.5563,0.3431,0.3431
Statute,0.403,0.2375,0.2375


CPU times: user 19.5 s, sys: 1.43 s, total: 21 s
Wall time: 18.9 s


#### KNN

In [14]:
from sklearn.neighbors import KNeighborsClassifier

def knn_trainer(X, y):
  knn = KNeighborsClassifier(5)
  return knn.fit(X, y)

In [15]:
%%time

cross_validation(knn_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.4899,0.0231
Recall,0.3861,0.0094
F1,0.3968,0.0119


Unnamed: 0,Score,Standard Deviation
Precision,0.2769,0.0089
Recall,0.2087,0.0096
F1,0.2145,0.0109


Unnamed: 0,Precision,Recall,F1
Argument,0.0706,0.1365,0.1365
Facts,0.1823,0.1908,0.1908
Precedent,0.1289,0.241,0.241
Ratio of the decision,0.2986,0.1755,0.1755
Ruling by Lower Court,0.0209,0.0113,0.0113
Ruling by Present Court,0.5872,0.23,0.23
Statute,0.0962,0.0581,0.0581


CPU times: user 3min 35s, sys: 6 s, total: 3min 41s
Wall time: 2min 7s


#### MLP Classifier

In [16]:
from sklearn.neural_network import MLPClassifier

def mlp_trainer(X, y):
  # Default MLP from scikit-learn
  mlp = MLPClassifier(early_stopping=True, random_state=1)
  return mlp.fit(X, y)

In [17]:
%%time

cross_validation(mlp_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.8778,0.0153
Recall,0.6735,0.0702
F1,0.7248,0.0715


Unnamed: 0,Score,Standard Deviation
Precision,0.5397,0.0813
Recall,0.3669,0.0358
F1,0.404,0.0388


Unnamed: 0,Precision,Recall,F1
Argument,0.4133,0.1827,0.1827
Facts,0.3804,0.3839,0.3839
Precedent,0.2814,0.2291,0.2291
Ratio of the decision,0.3579,0.5052,0.5052
Ruling by Lower Court,0.1667,0.0122,0.0122
Ruling by Present Court,0.6522,0.3008,0.3008
Statute,0.4465,0.2207,0.2207


CPU times: user 4min 22s, sys: 1min 13s, total: 5min 36s
Wall time: 3min


#### Decision Tree

In [18]:
from sklearn.tree import DecisionTreeClassifier

def decision_tree_trainer(X, y):
  dtree = DecisionTreeClassifier(max_depth=5, random_state=1)
  return dtree.fit(X, y)

In [19]:
%%time

cross_validation(decision_tree_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.5302,0.1092
Recall,0.2367,0.0238
F1,0.2324,0.0327


Unnamed: 0,Score,Standard Deviation
Precision,0.3711,0.0677
Recall,0.217,0.0286
F1,0.1989,0.036


Unnamed: 0,Precision,Recall,F1
Argument,0.4492,0.0546,0.0546
Facts,0.1539,0.2365,0.2365
Precedent,0.4493,0.1188,0.1188
Ratio of the decision,0.3211,0.5468,0.5468
Ruling by Lower Court,0.0,0.0,0.0
Ruling by Present Court,0.1161,0.0244,0.0244
Statute,0.3659,0.1039,0.1039


CPU times: user 37.8 s, sys: 585 ms, total: 38.4 s
Wall time: 38.2 s


#### Random Forest

In [20]:
from sklearn.ensemble import RandomForestClassifier

def random_forest_trainer(X, y):
  rforest = RandomForestClassifier(max_depth=5, n_estimators=10, random_state=1)
  return rforest.fit(X, y)

In [21]:
%%time

cross_validation(random_forest_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.5853,0.0144
Recall,0.1566,0.0021
F1,0.1086,0.004


Unnamed: 0,Score,Standard Deviation
Precision,0.3479,0.0559
Recall,0.1512,0.0033
F1,0.098,0.0136


Unnamed: 0,Precision,Recall,F1
Argument,0.5082,0.0175,0.0175
Facts,0.4475,0.0165,0.0165
Precedent,0.3571,0.0083,0.0083
Ratio of the decision,0.2837,0.7107,0.7107
Ruling by Lower Court,0.0,0.0,0.0
Ruling by Present Court,0.0,0.0,0.0
Statute,0.1429,0.0029,0.0029


CPU times: user 19.9 s, sys: 606 ms, total: 20.5 s
Wall time: 20.4 s


#### AdaBoost

In [22]:
from sklearn.ensemble import AdaBoostClassifier

def adaboost_trainer(X, y):
  adab = AdaBoostClassifier(random_state=1)
  return adab.fit(X, y)

In [23]:
%%time

cross_validation(adaboost_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.5323,0.0224
Recall,0.3224,0.0222
F1,0.3551,0.025


Unnamed: 0,Score,Standard Deviation
Precision,0.4525,0.0144
Recall,0.2867,0.0295
F1,0.3068,0.0298


Unnamed: 0,Precision,Recall,F1
Argument,0.4417,0.1835,0.1835
Facts,0.303,0.1506,0.1506
Precedent,0.4148,0.1215,0.1215
Ratio of the decision,0.3107,0.59,0.59
Ruling by Lower Court,0.0944,0.0082,0.0082
Ruling by Present Court,0.4024,0.2276,0.2276
Statute,0.2953,0.152,0.152


CPU times: user 6min 6s, sys: 3.94 s, total: 6min 10s
Wall time: 6min 8s


#### Naive Bayes

In [24]:
from sklearn.naive_bayes import GaussianNB

def naive_bayes_trainer(X, y):
  nb = GaussianNB()
  return nb.fit(X, y)

In [25]:
%%time
cross_validation(naive_bayes_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.6501,0.0066
Recall,0.8322,0.0062
F1,0.6909,0.0062


Unnamed: 0,Score,Standard Deviation
Precision,0.2789,0.0205
Recall,0.2837,0.0275
F1,0.268,0.0229


Unnamed: 0,Precision,Recall,F1
Argument,0.1124,0.0913,0.0913
Facts,0.3148,0.2877,0.2877
Precedent,0.2103,0.1929,0.1929
Ratio of the decision,0.3306,0.37,0.37
Ruling by Lower Court,0.0362,0.0433,0.0433
Ruling by Present Court,0.141,0.3007,0.3007
Statute,0.2489,0.1324,0.1324


CPU times: user 37.7 s, sys: 2.01 s, total: 39.7 s
Wall time: 39.4 s


#### XGBoost

In [26]:
from xgboost.sklearn import XGBClassifier

def xgboost_trainer(X, y):
  xgboost = XGBClassifier(objective="multi:softmax", tree_method='hist')
  return xgboost.fit(X, y)

In [27]:
%%time
cross_validation(xgboost_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.8104,0.0049
Recall,0.4497,0.0095
F1,0.5239,0.0084


Unnamed: 0,Score,Standard Deviation
Precision,0.6089,0.0808
Recall,0.3405,0.0331
F1,0.3834,0.0387


Unnamed: 0,Precision,Recall,F1
Argument,0.4975,0.2058,0.2058
Facts,0.3959,0.2876,0.2876
Precedent,0.4367,0.1468,0.1468
Ratio of the decision,0.3355,0.6018,0.6018
Ruling by Lower Court,0.25,0.0086,0.0086
Ruling by Present Court,0.6279,0.2645,0.2645
Statute,0.5011,0.1874,0.1874


CPU times: user 20min 19s, sys: 7.02 s, total: 20min 26s
Wall time: 20min 21s


### PyTorch models

In [28]:
import torch
from torch.utils.data import Dataset

class MyDataset(Dataset):
  def __init__(self, inputs, targets):
    self.X = torch.from_numpy(inputs).float()
    self.y = torch.from_numpy(label_encoder.transform(targets)).float()

  def __len__(self):
    return len(self.X)

  def __getitem__(self, idx):
    return [self.X[idx], self.y[idx]]


In [29]:
from torch.optim import Adam
from torch.utils.data import DataLoader
from sklearn.model_selection import ShuffleSplit

torch.manual_seed(1)

class MLPTrainer:

  def __init__(self, model, l2_penalty=0.0001):
    self.model = model
    setattr(self.model.__class__, 'predict', self.predict)
    # The training replicates the default configuration from scikit-learn's MLPClassifier
    self.criterion = torch.nn.CrossEntropyLoss()
    self.lambd = l2_penalty # weight decay for Adam optmizer
    self.n_epochs = 200

  def fit(self, inputs, targets, verbose=False):
    # early stopping params and variables
    tol = 0.001
    n_iter_no_change = 7
    early_stop_count = 0
    best_loss_validation = float("inf")
    # splitting train data into train and validation sets in order to perform early stopping
    spl = ShuffleSplit(n_splits=1, train_size=0.9, random_state=1)
    targets = np.array(targets)
    for train_index, val_index in spl.split(inputs):
      # getting set's data
      train_x = inputs[train_index]
      train_y = targets[train_index]
      validation_x = inputs[val_index]
      validation_y = targets[val_index]
      train_dl = DataLoader(
        MyDataset(train_x, train_y), 
        batch_size=64)
      validation_dl = DataLoader(
        MyDataset(validation_x, validation_y), 
        batch_size=len(validation_x))
      # training
      self.model.train()
      optimizer = Adam(
        self.model.parameters(), 
        weight_decay=self.lambd)
      for epoch in range(self.n_epochs):
        # iterate mini batches
        for x, y in train_dl:
          optimizer.zero_grad()
          yhat = self.model(x)
          loss = self.criterion(yhat, y)
          loss.backward()
          optimizer.step()
        # Early stopping
        for x, y in validation_dl:
          loss_validation = self.criterion(self.model(x), y)
        if loss_validation >= best_loss_validation - tol:
          early_stop_count += 1
        else:
          early_stop_count = 0
          best_loss_validation = loss_validation
        if early_stop_count == n_iter_no_change:
          break
    if verbose:
      print(f'TRAIN: Stopped at epoch {epoch + 1} {"(MAX EPOCH)" if epoch + 1 == self.n_epochs else ""}')

    self.model.eval()
    return self.model

  def predict(self, X):
    y = self.model.forward(torch.from_numpy(X).float())
    return label_encoder.inverse_transform(y.detach().numpy())


#### TorchMLP

In [30]:
from torch.nn.init import xavier_uniform_
from torch.nn.init import kaiming_uniform_

class TorchMLP(torch.nn.Module):
  def __init__(self, n_inputs, n_classes):
    super(TorchMLP, self).__init__()
    # hidden layer
    n_hidden_units = 100
    hidden1 = torch.nn.Linear(n_inputs, n_hidden_units)
    kaiming_uniform_(hidden1.weight, nonlinearity='relu')
    relu = torch.nn.ReLU()
    # output layer
    output = torch.nn.Linear(n_hidden_units, n_classes)
    xavier_uniform_(output.weight)
    # There's no need of softmax function because it's included in the CrossEntropyLoss function
    self.layers = torch.nn.Sequential(
      hidden1, 
      relu, 
      output)
  
  def forward(self, X):
    return self.layers(X)
  

In [31]:
def torch_mlp_trainer(X, y):
  trainer = MLPTrainer(TorchMLP(n_features, n_classes), l2_penalty=0.0015)
  return trainer.fit(X, y, verbose=True)

In [32]:
%%time
cross_validation(torch_mlp_trainer)

TRAIN: Stopped at epoch 20 
TRAIN: Stopped at epoch 21 
TRAIN: Stopped at epoch 17 
TRAIN: Stopped at epoch 18 
TRAIN: Stopped at epoch 20 


Unnamed: 0,Score,Standard Deviation
Precision,0.8999,0.0064
Recall,0.681,0.0194
F1,0.7291,0.0214


Unnamed: 0,Score,Standard Deviation
Precision,0.5447,0.0875
Recall,0.3665,0.0414
F1,0.3994,0.0385


Unnamed: 0,Precision,Recall,F1
Argument,0.4186,0.1817,0.1817
Facts,0.3768,0.3904,0.3904
Precedent,0.2824,0.2294,0.2294
Ratio of the decision,0.3612,0.5067,0.5067
Ruling by Lower Court,0.1786,0.005,0.005
Ruling by Present Court,0.6379,0.3043,0.3043
Statute,0.468,0.215,0.215


CPU times: user 3min 1s, sys: 3.87 s, total: 3min 5s
Wall time: 3min 4s


#### TorchMLPMaxPool

In [33]:
import math

class TorchMLPMaxPool(torch.nn.Module):
  def __init__(self, n_inputs, n_classes):
    super(TorchMLPMaxPool, self).__init__()
    # max pool
    window_size = 2
    max_pool = torch.nn.MaxPool1d(window_size, ceil_mode=True)
    n_out_pool = math.ceil((n_inputs - window_size) / window_size + 1)
    # hidden layers
    n_hidden_units = 100
    hidden1 = torch.nn.Linear(n_out_pool, n_hidden_units)
    kaiming_uniform_(hidden1.weight, nonlinearity='relu')
    relu = torch.nn.ReLU()
    # output layer
    output = torch.nn.Linear(n_hidden_units, n_classes)
    xavier_uniform_(output.weight)
    # There's no need of softmax function because it's included in the CrossEntropyLoss function
    self.layers = torch.nn.Sequential(
      max_pool, 
      hidden1, 
      relu, 
      output)
  
  def forward(self, X):
    return self.layers(X)


In [34]:
def torch_mlp_maxpool_trainer(X, y):
  trainer = MLPTrainer(TorchMLPMaxPool(n_features, n_classes), l2_penalty=0.0015)
  return trainer.fit(X, y, verbose=True)

In [35]:
%%time
cross_validation(torch_mlp_maxpool_trainer)

TRAIN: Stopped at epoch 19 
TRAIN: Stopped at epoch 27 
TRAIN: Stopped at epoch 17 
TRAIN: Stopped at epoch 18 
TRAIN: Stopped at epoch 20 


Unnamed: 0,Score,Standard Deviation
Precision,0.8673,0.0103
Recall,0.625,0.0254
F1,0.6728,0.0252


Unnamed: 0,Score,Standard Deviation
Precision,0.5348,0.0865
Recall,0.3621,0.0417
F1,0.3935,0.0377


Unnamed: 0,Precision,Recall,F1
Argument,0.4182,0.1713,0.1713
Facts,0.3727,0.3942,0.3942
Precedent,0.2787,0.2269,0.2269
Ratio of the decision,0.3576,0.4944,0.4944
Ruling by Lower Court,0.1714,0.005,0.005
Ruling by Present Court,0.6329,0.3007,0.3007
Statute,0.4427,0.2181,0.2181


CPU times: user 1min 59s, sys: 2.66 s, total: 2min 2s
Wall time: 2min 1s


#### TorchLogisticRegression

In [36]:
class TorchLogisticRegression(torch.nn.Module):
  def __init__(self, n_inputs, n_classes, verbose=False):
    super(TorchLogisticRegression, self).__init__()
    self.verbose = verbose
    self.layer = torch.nn.Linear(n_inputs, n_classes)
    xavier_uniform_(self.layer.weight)

  def forward(self, X):
    y = self.layer(X)
    return y
  
  def predict(self, X):
    y = self.forward(torch.from_numpy(X).float())
    return label_encoder.inverse_transform(y.detach().numpy())

  def fit(self, X, y):
    # learning algorithm's params
    learning_rate = 0.9
    momentum = 0.9
    lambda_param = 0.0001 # L2 regularization
    n_iterations = 1000
    decay_rate = 0.95  # learning rate decay
    # early stopping params and variables
    tol = 0.001
    n_iter_no_change = 10
    early_stop_count = 0
    best_loss = float("inf")
    # loss function and optmizer
    self.train()
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(
      self.parameters(), 
      lr=learning_rate, 
      momentum=momentum, 
      weight_decay=lambda_param)
    lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(
        optimizer=optimizer, 
        gamma=decay_rate)
    # Data loader
    batch_size = 64
    train_dl = DataLoader(
      MyDataset(X, y), 
      batch_size=batch_size, 
      shuffle=True)
    # Train loop
    for i in range(1, n_iterations + 1):
      # iterate mini batches
      for x_batch, y_batch in train_dl:
        optimizer.zero_grad()
        y_hat = self(x_batch)
        loss = criterion(y_hat, y_batch)
        loss.backward()
        optimizer.step()
      lr_scheduler.step()
      # early stop
      if loss >= best_loss - tol:
        early_stop_count += 1
      else:
        early_stop_count = 0
        best_loss = loss
      if early_stop_count == n_iter_no_change:
        break
    
    if self.verbose:
      print(f'TRAIN: Stopped at iteration {i} {"(MAX ITERATION)" if i == n_iterations else ""}')
    self.eval()
    return self


In [37]:
def torch_lr_trainer(X, y):
  lr_ = TorchLogisticRegression(n_features, n_classes, verbose=True)
  return lr_.fit(X, y)

In [38]:
%%time
cross_validation(torch_lr_trainer)

TRAIN: Stopped at iteration 30 
TRAIN: Stopped at iteration 24 
TRAIN: Stopped at iteration 35 
TRAIN: Stopped at iteration 19 
TRAIN: Stopped at iteration 26 


Unnamed: 0,Score,Standard Deviation
Precision,0.8545,0.0032
Recall,0.5564,0.0234
F1,0.6197,0.0206


Unnamed: 0,Score,Standard Deviation
Precision,0.5937,0.0832
Recall,0.3554,0.038
F1,0.3919,0.0419


Unnamed: 0,Precision,Recall,F1
Argument,0.4913,0.1534,0.1534
Facts,0.3872,0.3951,0.3951
Precedent,0.3435,0.1803,0.1803
Ratio of the decision,0.3563,0.5568,0.5568
Ruling by Lower Court,0.2143,0.0068,0.0068
Ruling by Present Court,0.6767,0.3002,0.3002
Statute,0.4993,0.1842,0.1842


CPU times: user 53.5 s, sys: 1.25 s, total: 54.8 s
Wall time: 54.5 s


### Summary

In [39]:
metrics_df = pd.DataFrame(columns=['Precision', 'Recall', 'F1'])
for model_name, metrics in test_metrics.items():
  metrics_df.loc[model_name] = [f'{metrics[0]:.4f}', f'{metrics[1]:.4f}', f'{metrics[2]:.4f}']
metrics_display = display(metrics_df, display_id='metrics_table')

Unnamed: 0,Precision,Recall,F1
LogisticRegression,0.6107,0.3528,0.3908
LinearSVC,0.4876,0.3815,0.4096
KNeighborsClassifier,0.2769,0.2087,0.2145
MLPClassifier,0.5397,0.3669,0.404
DecisionTreeClassifier,0.3711,0.217,0.1989
RandomForestClassifier,0.3479,0.1512,0.098
AdaBoostClassifier,0.4525,0.2867,0.3068
GaussianNB,0.2789,0.2837,0.268
XGBClassifier,0.6089,0.3405,0.3834
TorchMLP,0.5447,0.3665,0.3994
