<a href="https://colab.research.google.com/github/alexlimatds/circle-2022/blob/main/RRLLJ_TF_IDF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Rhetorical Role Labeling for Legal Judgments - experiments with TF-IDF features

In this notebook we utilize TF-IDF features to represent the sentences.

### Loading dataset

In [1]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
g_drive_dir = "/content/gdrive/MyDrive/"

Mounted at /content/gdrive


In [2]:
!mkdir data
!mkdir data/train
!tar -xf {g_drive_dir}AILA_2021/AILA_2021_train.tar.xz -C data/train

train_dir = 'data/train/'

In [3]:
import pandas as pd
from os import listdir

def read_docs(dir_name):
  docs = {} # key: file name, value: dataframe with sentences and labels
  for f in listdir(dir_name):
    df = pd.read_csv(
        dir_name + f, 
        sep='\t', 
        names=['sentence', 'label'])
    docs[f] = df
  return docs

docs_train = read_docs(train_dir)
print(f'TRAIN: {len(docs_train)} documents read.')

TRAIN: 60 documents read.


In [4]:
def sentences_to_list(docs):
  sentences_list = []
  targets_list = []
  for df in docs.values():
    sentences_list.extend(df['sentence'].tolist())
    targets_list.extend(df['label'].tolist())
  return sentences_list, targets_list

def target_stats(set_name, targets):
  stats = {}
  for t in targets:
    stats[t] = stats.get(t, 0) + 1
  print(f'Statistics of the {set_name} set:')
  print(f'   Total number of sentences: {len(targets)}')
  for t, n in stats.items():
    print(f'   Number of {t} labels: {n}')

sentences_train, train_labels = sentences_to_list(docs_train)

target_stats('TRAIN', train_labels)

Statistics of the TRAIN set:
   Total number of sentences: 10024
   Number of Facts labels: 2368
   Number of Statute labels: 671
   Number of Ratio of the decision labels: 3919
   Number of Ruling by Lower Court labels: 341
   Number of Ruling by Present Court labels: 301
   Number of Argument labels: 901
   Number of Precedent labels: 1523


### TF-IDF features

In [5]:
import re

def preprocess(str):
  pstr = str
  pstr = re.sub(r'[/(){}\[\]\|@,;]', ' ', pstr) # replaces symbols with spaces
  pstr = re.sub(r'[^0-9a-z #+_]', '', pstr)     # removes bad symbols
  pstr = re.sub(r'\d+', '', pstr)               # removes numbers
  return pstr

In [6]:
%%time
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_model = TfidfVectorizer(
    preprocessor=preprocess, 
    ngram_range=(1, 3), 
    min_df=10)
tfidf_model.fit(sentences_train)

n_features = len(tfidf_model.vocabulary_)
print(f'Learned {n_features} terms.')

Learned 7438 terms.
CPU times: user 1.79 s, sys: 161 ms, total: 1.95 s
Wall time: 2.38 s


In [7]:
docs_train_tfidf = {} # key: file id, value: matrix of features
for doc_id, df in docs_train.items():
  docs_train_tfidf[doc_id] = tfidf_model.transform(df['sentence'].tolist()).toarray()


### Label encoder

In [8]:
from sklearn.preprocessing import LabelBinarizer

label_encoder = LabelBinarizer()
label_encoder.fit(train_labels)

docs_train_targets = {} # key: file id, value: matrix of one-hot encoded labels
for doc_id, df in docs_train.items():
  docs_train_targets[doc_id] = label_encoder.transform(df['label'].tolist())

n_classes = label_encoder.classes_.shape[0]
n_classes

7

### Evaluation functions

In [9]:
import numpy as np
import sklearn
from sklearn.model_selection import KFold
from sklearn.metrics import precision_recall_fscore_support
from IPython.display import display, HTML

def docs_to_sentences(docs_idx, doc_keys_list):
  features_ = None
  targets_ = []
  for idx in docs_idx:
    doc_id = doc_keys_list[idx]
    if features_ is None:
      features_ = docs_train_tfidf[doc_id]
    else:
      features_ = np.vstack((features_, docs_train_tfidf[doc_id]))
    targets_.extend(docs_train[doc_id]['label'].tolist())
  return features_, targets_

def metrics_report(title, averages, stds):
  report_df = pd.DataFrame(columns=['Score', 'Standard Deviation'])
  report_df.loc['Precision'] = [f'{averages[0]:.4f}', f'{stds[0]:.4f}']
  report_df.loc['Recall'] = [f'{averages[1]:.4f}', f'{stds[1]:.4f}']
  report_df.loc['F1'] = [f'{averages[2]:.4f}', f'{stds[2]:.4f}']
  display(HTML(f'<br><span style="font-weight: bold">{title}: cross-validation macro averages</span>'))
  display(report_df)  

def classification_report(metrics):
  report_df = pd.DataFrame(columns=['Precision', 'Recall', 'F1'])
  for i, l in enumerate(label_encoder.classes_):
    report_df.loc[l] = [
      f'{metrics[i, 0]:.4f}', 
      f'{metrics[i, 1]:.4f}', 
      f'{metrics[i, 2]:.4f}', 
    ]
  display(HTML(f'<br><span style="font-weight: bold">Classification Report (cross-validation test averages)</span>'))
  display(report_df)

test_metrics = {}

def cross_validation(trainer):
  train_metrics_cross = []
  test_metrics_cross = []
  test_metrics_by_class = np.zeros((n_classes, 3)) # 3 metrics (P, R, F1)
  n_folds = 5
  skf = KFold(n_splits=n_folds) # for cross-validation
  docs_list = list(docs_train.keys())
  for train_docs_idx, test_docs_idx in skf.split(docs_list): # The cross-validation splitting is document-oriented
    # train
    train_features_fold, train_targets_fold = docs_to_sentences(train_docs_idx, docs_list)
    model = trainer(train_features_fold, train_targets_fold)
    # test
    test_features_fold, test_targets_fold = docs_to_sentences(test_docs_idx, docs_list)
    predictions = model.predict(test_features_fold)
    # averaged test metrics
    p_test, r_test, f1_test, _ = precision_recall_fscore_support(
        test_targets_fold, 
        predictions, 
        average='macro', 
        zero_division=0)
    test_metrics_cross.append([p_test, r_test, f1_test])
    # test metrics by class
    metrics = precision_recall_fscore_support(
        test_targets_fold, 
        predictions, 
        average=None, 
        zero_division=0, 
        labels=label_encoder.classes_)
    test_metrics_by_class = test_metrics_by_class + np.hstack((
        metrics[0].reshape(-1, 1),  # precision
        metrics[1].reshape(-1, 1),  # recall
        metrics[1].reshape(-1, 1))) # F1
    # train metrics
    predictions = model.predict(train_features_fold)
    p_train, r_train, f1_train, _ = precision_recall_fscore_support(
        train_targets_fold, 
        predictions, 
        average='macro', 
        zero_division=0)
    train_metrics_cross.append([p_train, r_train, f1_train])
  
  train_metrics_cross = np.array(train_metrics_cross)
  train_mean = np.mean(train_metrics_cross, axis=0)
  train_std = np.std(train_metrics_cross, axis=0)
  metrics_report('TRAIN', train_mean, train_std)

  test_metrics_cross = np.array(test_metrics_cross)
  test_mean = np.mean(test_metrics_cross, axis=0)
  test_std = np.std(test_metrics_cross, axis=0)
  metrics_report('TEST', test_mean, test_std)

  test_metrics_by_class /= n_classes
  classification_report(test_metrics_by_class)

  test_metrics[model.__class__.__name__] = test_mean

### Scikit-learn Models

#### Logistic regression

In [10]:
from sklearn.linear_model import LogisticRegression

def lr_trainer(X, y):
  logreg = LogisticRegression(solver='sag', random_state=1)
  return logreg.fit(X, y)

In [11]:
%%time

cross_validation(lr_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.8596,0.0082
Recall,0.5471,0.0138
F1,0.6074,0.0109


Unnamed: 0,Score,Standard Deviation
Precision,0.5964,0.0545
Recall,0.3645,0.0235
F1,0.4027,0.032


Unnamed: 0,Precision,Recall,F1
Argument,0.5083,0.159,0.159
Facts,0.3965,0.4211,0.4211
Precedent,0.3557,0.1816,0.1816
Ratio of the decision,0.3453,0.5515,0.5515
Ruling by Lower Court,0.1429,0.003,0.003
Ruling by Present Court,0.6796,0.303,0.303
Statute,0.5539,0.2031,0.2031


CPU times: user 6min 52s, sys: 2.5 s, total: 6min 55s
Wall time: 6min 52s


#### Linear SVM

In [12]:
from sklearn.svm import LinearSVC

def linear_svm_trainer(X, y):
  svm = LinearSVC(random_state=1)
  return svm.fit(X, y)

In [13]:
%%time

cross_validation(linear_svm_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.9513,0.0017
Recall,0.8943,0.0067
F1,0.9204,0.0042


Unnamed: 0,Score,Standard Deviation
Precision,0.4829,0.0422
Recall,0.3902,0.0327
F1,0.4146,0.0378


Unnamed: 0,Precision,Recall,F1
Argument,0.3709,0.228,0.228
Facts,0.3647,0.4132,0.4132
Precedent,0.2613,0.2342,0.2342
Ratio of the decision,0.3527,0.4395,0.4395
Ruling by Lower Court,0.0905,0.023,0.023
Ruling by Present Court,0.5502,0.3511,0.3511
Statute,0.4241,0.2619,0.2619


CPU times: user 19.1 s, sys: 1.65 s, total: 20.7 s
Wall time: 18.6 s


#### KNN

In [14]:
from sklearn.neighbors import KNeighborsClassifier

def knn_trainer(X, y):
  knn = KNeighborsClassifier(5)
  return knn.fit(X, y)

In [15]:
%%time

cross_validation(knn_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.4984,0.0233
Recall,0.3925,0.0084
F1,0.403,0.0084


Unnamed: 0,Score,Standard Deviation
Precision,0.2869,0.0226
Recall,0.2064,0.0151
F1,0.2115,0.0227


Unnamed: 0,Precision,Recall,F1
Argument,0.0693,0.1378,0.1378
Facts,0.1933,0.195,0.195
Precedent,0.1305,0.2426,0.2426
Ratio of the decision,0.2886,0.1692,0.1692
Ruling by Lower Court,0.0139,0.007,0.007
Ruling by Present Court,0.6466,0.2222,0.2222
Statute,0.0921,0.0581,0.0581


CPU times: user 3min 55s, sys: 3.9 s, total: 3min 59s
Wall time: 2min 14s


#### MLP Classifier

In [16]:
from sklearn.neural_network import MLPClassifier

def mlp_trainer(X, y):
  # Default MLP from scikit-learn
  mlp = MLPClassifier(early_stopping=True, random_state=1)
  return mlp.fit(X, y)

In [17]:
%%time

cross_validation(mlp_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.8625,0.0072
Recall,0.6107,0.0581
F1,0.6583,0.0563


Unnamed: 0,Score,Standard Deviation
Precision,0.5555,0.0864
Recall,0.3722,0.0232
F1,0.4075,0.0331


Unnamed: 0,Precision,Recall,F1
Argument,0.4372,0.1773,0.1773
Facts,0.3865,0.4247,0.4247
Precedent,0.3035,0.2341,0.2341
Ratio of the decision,0.3498,0.5005,0.5005
Ruling by Lower Court,0.1429,0.0029,0.0029
Ruling by Present Court,0.6649,0.2906,0.2906
Statute,0.4926,0.2306,0.2306


CPU times: user 4min 25s, sys: 1min 12s, total: 5min 37s
Wall time: 2min 59s


#### Decision Tree

In [18]:
from sklearn.tree import DecisionTreeClassifier

def decision_tree_trainer(X, y):
  dtree = DecisionTreeClassifier(max_depth=5, random_state=1)
  return dtree.fit(X, y)

In [19]:
%%time

cross_validation(decision_tree_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.5298,0.0673
Recall,0.2623,0.0225
F1,0.2629,0.0292


Unnamed: 0,Score,Standard Deviation
Precision,0.363,0.0493
Recall,0.247,0.028
F1,0.2391,0.0381


Unnamed: 0,Precision,Recall,F1
Argument,0.4187,0.0765,0.0765
Facts,0.1985,0.4231,0.4231
Precedent,0.4535,0.117,0.117
Ratio of the decision,0.346,0.4368,0.4368
Ruling by Lower Court,0.0,0.0,0.0
Ruling by Present Court,0.068,0.0298,0.0298
Statute,0.3302,0.1521,0.1521


CPU times: user 36 s, sys: 434 ms, total: 36.4 s
Wall time: 36.2 s


#### Random Forest

In [20]:
from sklearn.ensemble import RandomForestClassifier

def random_forest_trainer(X, y):
  rforest = RandomForestClassifier(max_depth=5, n_estimators=10, random_state=1)
  return rforest.fit(X, y)

In [21]:
%%time

cross_validation(random_forest_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.5721,0.0604
Recall,0.1598,0.0097
F1,0.1141,0.0158


Unnamed: 0,Score,Standard Deviation
Precision,0.4334,0.098
Recall,0.151,0.0035
F1,0.0961,0.0151


Unnamed: 0,Precision,Recall,F1
Argument,0.4439,0.0098,0.0098
Facts,0.3799,0.0201,0.0201
Precedent,0.6905,0.0119,0.0119
Ratio of the decision,0.2719,0.7079,0.7079
Ruling by Lower Court,0.0,0.0,0.0
Ruling by Present Court,0.0,0.0,0.0
Statute,0.381,0.0055,0.0055


CPU times: user 17.5 s, sys: 443 ms, total: 18 s
Wall time: 17.9 s


#### AdaBoost

In [22]:
from sklearn.ensemble import AdaBoostClassifier

def adaboost_trainer(X, y):
  adab = AdaBoostClassifier(random_state=1)
  return adab.fit(X, y)

In [23]:
%%time

cross_validation(adaboost_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.5546,0.0135
Recall,0.3312,0.0252
F1,0.3677,0.0256


Unnamed: 0,Score,Standard Deviation
Precision,0.4517,0.0295
Recall,0.2968,0.016
F1,0.3148,0.014


Unnamed: 0,Precision,Recall,F1
Argument,0.4365,0.1889,0.1889
Facts,0.295,0.2173,0.2173
Precedent,0.4365,0.1258,0.1258
Ratio of the decision,0.3078,0.5479,0.5479
Ruling by Lower Court,0.0204,0.0033,0.0033
Ruling by Present Court,0.46,0.226,0.226
Statute,0.3023,0.1749,0.1749


CPU times: user 5min 34s, sys: 1.94 s, total: 5min 36s
Wall time: 5min 35s


#### Naive Bayes

In [24]:
from sklearn.naive_bayes import GaussianNB

def naive_bayes_trainer(X, y):
  nb = GaussianNB()
  return nb.fit(X, y)

In [25]:
%%time
cross_validation(naive_bayes_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.6515,0.0125
Recall,0.8329,0.0032
F1,0.6915,0.0113


Unnamed: 0,Score,Standard Deviation
Precision,0.2822,0.0256
Recall,0.2917,0.0145
F1,0.2717,0.0218


Unnamed: 0,Precision,Recall,F1
Argument,0.1284,0.1075,0.1075
Facts,0.3262,0.2984,0.2984
Precedent,0.2152,0.1832,0.1832
Ratio of the decision,0.3187,0.3722,0.3722
Ruling by Lower Court,0.0386,0.0467,0.0467
Ruling by Present Court,0.1489,0.3184,0.3184
Statute,0.2353,0.1321,0.1321


CPU times: user 31.9 s, sys: 639 ms, total: 32.6 s
Wall time: 32.5 s


#### XGBoost

In [26]:
from xgboost.sklearn import XGBClassifier

def xgboost_trainer(X, y):
  xgboost = XGBClassifier(objective="multi:softmax", tree_method='hist')
  return xgboost.fit(X, y)

In [27]:
%%time
cross_validation(xgboost_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.8137,0.0067
Recall,0.4505,0.0173
F1,0.5247,0.0127


Unnamed: 0,Score,Standard Deviation
Precision,0.5997,0.089
Recall,0.3435,0.0235
F1,0.3823,0.0323


Unnamed: 0,Precision,Recall,F1
Argument,0.5193,0.2098,0.2098
Facts,0.388,0.3061,0.3061
Precedent,0.4433,0.1387,0.1387
Ratio of the decision,0.3262,0.591,0.591
Ruling by Lower Court,0.2143,0.0089,0.0089
Ruling by Present Court,0.6247,0.2671,0.2671
Statute,0.4825,0.1957,0.1957


CPU times: user 25min, sys: 4 s, total: 25min 4s
Wall time: 24min 57s


### PyTorch models

In [28]:
import torch
from torch.utils.data import Dataset

class MyDataset(Dataset):
  def __init__(self, inputs, targets):
    self.X = torch.from_numpy(inputs).float()
    self.y = torch.from_numpy(label_encoder.transform(targets)).float()

  def __len__(self):
    return len(self.X)

  def __getitem__(self, idx):
    return [self.X[idx], self.y[idx]]


In [29]:
from torch.optim import Adam
from torch.utils.data import DataLoader
from sklearn.model_selection import ShuffleSplit

torch.manual_seed(1)

class MLPTrainer:

  def __init__(self, model, l2_penalty=0.0001):
    self.model = model
    setattr(self.model.__class__, 'predict', self.predict)
    # The training replicates the default configuration from scikit-learn's MLPClassifier
    self.criterion = torch.nn.CrossEntropyLoss()
    self.lambd = l2_penalty # weight decay for Adam optmizer
    self.n_epochs = 200

  def fit(self, inputs, targets, verbose=False):
    # early stopping params and variables
    tol = 0.001
    n_iter_no_change = 7
    early_stop_count = 0
    best_loss_validation = float("inf")
    # splitting train data into train and validation sets in order to perform early stopping
    spl = ShuffleSplit(n_splits=1, train_size=0.9, random_state=1)
    targets = np.array(targets)
    for train_index, val_index in spl.split(inputs):
      # getting set's data
      train_x = inputs[train_index]
      train_y = targets[train_index]
      validation_x = inputs[val_index]
      validation_y = targets[val_index]
      train_dl = DataLoader(
        MyDataset(train_x, train_y), 
        batch_size=64)
      validation_dl = DataLoader(
        MyDataset(validation_x, validation_y), 
        batch_size=len(validation_x))
      # training
      self.model.train()
      optimizer = Adam(
        self.model.parameters(), 
        weight_decay=self.lambd)
      for epoch in range(self.n_epochs):
        # iterate mini batches
        for x, y in train_dl:
          optimizer.zero_grad()
          yhat = self.model(x)
          loss = self.criterion(yhat, y)
          loss.backward()
          optimizer.step()
        # Early stopping
        for x, y in validation_dl:
          loss_validation = self.criterion(self.model(x), y)
        if loss_validation >= best_loss_validation - tol:
          early_stop_count += 1
        else:
          early_stop_count = 0
          best_loss_validation = loss_validation
        if early_stop_count == n_iter_no_change:
          break
    if verbose:
      print(f'TRAIN: Stopped at epoch {epoch + 1} {"(MAX EPOCH)" if epoch + 1 == self.n_epochs else ""}')

    self.model.eval()
    return self.model

  def predict(self, X):
    y = self.model.forward(torch.from_numpy(X).float())
    return label_encoder.inverse_transform(y.detach().numpy())


#### TorchMLP

In [30]:
from torch.nn.init import xavier_uniform_
from torch.nn.init import kaiming_uniform_

class TorchMLP(torch.nn.Module):
  def __init__(self, n_inputs, n_classes):
    super(TorchMLP, self).__init__()
    # hidden layer
    n_hidden_units = 100
    hidden1 = torch.nn.Linear(n_inputs, n_hidden_units)
    kaiming_uniform_(hidden1.weight, nonlinearity='relu')
    relu = torch.nn.ReLU()
    # output layer
    output = torch.nn.Linear(n_hidden_units, n_classes)
    xavier_uniform_(output.weight)
    # There's no need of softmax function because it's included in the CrossEntropyLoss function
    self.layers = torch.nn.Sequential(
      hidden1, 
      relu, 
      output)
  
  def forward(self, X):
    return self.layers(X)
  

In [31]:
def torch_mlp_trainer(X, y):
  trainer = MLPTrainer(TorchMLP(n_features, n_classes), l2_penalty=0.0015)
  return trainer.fit(X, y, verbose=True)

In [32]:
%%time
cross_validation(torch_mlp_trainer)

TRAIN: Stopped at epoch 20 
TRAIN: Stopped at epoch 19 
TRAIN: Stopped at epoch 20 
TRAIN: Stopped at epoch 20 
TRAIN: Stopped at epoch 19 


Unnamed: 0,Score,Standard Deviation
Precision,0.8932,0.0056
Recall,0.6931,0.0077
F1,0.7383,0.0081


Unnamed: 0,Score,Standard Deviation
Precision,0.5392,0.0571
Recall,0.3825,0.0356
F1,0.4159,0.0428


Unnamed: 0,Precision,Recall,F1
Argument,0.4021,0.2096,0.2096
Facts,0.3792,0.4133,0.4133
Precedent,0.2857,0.2279,0.2279
Ratio of the decision,0.3486,0.4814,0.4814
Ruling by Lower Court,0.1429,0.0089,0.0089
Ruling by Present Court,0.6503,0.3173,0.3173
Statute,0.4875,0.2541,0.2541


CPU times: user 3min 12s, sys: 2.53 s, total: 3min 15s
Wall time: 3min 14s


#### TorchMLPMaxPool

In [33]:
import math

class TorchMLPMaxPool(torch.nn.Module):
  def __init__(self, n_inputs, n_classes):
    super(TorchMLPMaxPool, self).__init__()
    # max pool
    window_size = 2
    max_pool = torch.nn.MaxPool1d(window_size, ceil_mode=True)
    n_out_pool = math.ceil((n_inputs - window_size) / window_size + 1)
    # hidden layers
    n_hidden_units = 100
    hidden1 = torch.nn.Linear(n_out_pool, n_hidden_units)
    kaiming_uniform_(hidden1.weight, nonlinearity='relu')
    relu = torch.nn.ReLU()
    # output layer
    output = torch.nn.Linear(n_hidden_units, n_classes)
    xavier_uniform_(output.weight)
    # There's no need of softmax function because it's included in the CrossEntropyLoss function
    self.layers = torch.nn.Sequential(
      max_pool, 
      hidden1, 
      relu, 
      output)
  
  def forward(self, X):
    return self.layers(X)


In [34]:
def torch_mlp_maxpool_trainer(X, y):
  trainer = MLPTrainer(TorchMLPMaxPool(n_features, n_classes), l2_penalty=0.0015)
  return trainer.fit(X, y, verbose=True)

In [35]:
%%time
cross_validation(torch_mlp_maxpool_trainer)

TRAIN: Stopped at epoch 21 
TRAIN: Stopped at epoch 20 
TRAIN: Stopped at epoch 20 
TRAIN: Stopped at epoch 21 
TRAIN: Stopped at epoch 20 


Unnamed: 0,Score,Standard Deviation
Precision,0.859,0.009
Recall,0.6352,0.0133
F1,0.6787,0.0098


Unnamed: 0,Score,Standard Deviation
Precision,0.5635,0.0312
Recall,0.3774,0.0276
F1,0.4101,0.0334


Unnamed: 0,Precision,Recall,F1
Argument,0.378,0.2013,0.2013
Facts,0.3631,0.4028,0.4028
Precedent,0.2901,0.2217,0.2217
Ratio of the decision,0.3471,0.4788,0.4788
Ruling by Lower Court,0.3095,0.0106,0.0106
Ruling by Present Court,0.6511,0.319,0.319
Statute,0.4783,0.2527,0.2527


CPU times: user 2min 6s, sys: 1.9 s, total: 2min 8s
Wall time: 2min 8s


#### TorchLogisticRegression

In [36]:
class TorchLogisticRegression(torch.nn.Module):
  def __init__(self, n_inputs, n_classes, verbose=False):
    super(TorchLogisticRegression, self).__init__()
    self.verbose = verbose
    self.layer = torch.nn.Linear(n_inputs, n_classes)
    xavier_uniform_(self.layer.weight)

  def forward(self, X):
    y = self.layer(X)
    return y
  
  def predict(self, X):
    y = self.forward(torch.from_numpy(X).float())
    return label_encoder.inverse_transform(y.detach().numpy())

  def fit(self, X, y):
    # learning algorithm's params
    learning_rate = 0.5
    momentum = 0.9
    lambda_param = 0.0001 # L2 regularization
    n_iterations = 1000
    decay_rate = 0.95  # learning rate decay
    # early stopping params and variables
    tol = 0.001
    n_iter_no_change = 10
    early_stop_count = 0
    best_loss = float("inf")
    # loss function and optmizer
    self.train()
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(
      self.parameters(), 
      lr=learning_rate, 
      momentum=momentum, 
      weight_decay=lambda_param)
    lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(
        optimizer=optimizer, 
        gamma=decay_rate)
    # Data loader
    batch_size = 64
    train_dl = DataLoader(
      MyDataset(X, y), 
      batch_size=batch_size, 
      shuffle=True)
    # Train loop
    for i in range(1, n_iterations + 1):
      # iterate mini batches
      for x_batch, y_batch in train_dl:
        optimizer.zero_grad()
        y_hat = self(x_batch)
        loss = criterion(y_hat, y_batch)
        loss.backward()
        optimizer.step()
      lr_scheduler.step()
      # early stop
      if loss >= best_loss - tol:
        early_stop_count += 1
      else:
        early_stop_count = 0
        best_loss = loss
      if early_stop_count == n_iter_no_change:
        break
    
    if self.verbose:
      print(f'TRAIN: Stopped at iteration {i} {"(MAX ITERATION)" if i == n_iterations else ""}')
    self.eval()
    return self


In [37]:
def torch_lr_trainer(X, y):
  lr_ = TorchLogisticRegression(n_features, n_classes, verbose=True)
  return lr_.fit(X, y)

In [38]:
%%time
cross_validation(torch_lr_trainer)

TRAIN: Stopped at iteration 25 
TRAIN: Stopped at iteration 26 
TRAIN: Stopped at iteration 26 
TRAIN: Stopped at iteration 45 
TRAIN: Stopped at iteration 37 


Unnamed: 0,Score,Standard Deviation
Precision,0.8465,0.0055
Recall,0.5378,0.0211
F1,0.5958,0.0192


Unnamed: 0,Score,Standard Deviation
Precision,0.5892,0.0601
Recall,0.3678,0.021
F1,0.4048,0.0298


Unnamed: 0,Precision,Recall,F1
Argument,0.4832,0.1666,0.1666
Facts,0.3958,0.4183,0.4183
Precedent,0.356,0.1909,0.1909
Ratio of the decision,0.3471,0.5473,0.5473
Ruling by Lower Court,0.1429,0.003,0.003
Ruling by Present Court,0.6798,0.3055,0.3055
Statute,0.5414,0.2073,0.2073


CPU times: user 58.5 s, sys: 930 ms, total: 59.4 s
Wall time: 59.2 s


### Summary

In [39]:
metrics_df = pd.DataFrame(columns=['Precision', 'Recall', 'F1'])
for model_name, metrics in test_metrics.items():
  metrics_df.loc[model_name] = [f'{metrics[0]:.4f}', f'{metrics[1]:.4f}', f'{metrics[2]:.4f}']
metrics_display = display(metrics_df, display_id='metrics_table')

Unnamed: 0,Precision,Recall,F1
LogisticRegression,0.5964,0.3645,0.4027
LinearSVC,0.4829,0.3902,0.4146
KNeighborsClassifier,0.2869,0.2064,0.2115
MLPClassifier,0.5555,0.3722,0.4075
DecisionTreeClassifier,0.363,0.247,0.2391
RandomForestClassifier,0.4334,0.151,0.0961
AdaBoostClassifier,0.4517,0.2968,0.3148
GaussianNB,0.2822,0.2917,0.2717
XGBClassifier,0.5997,0.3435,0.3823
TorchMLP,0.5392,0.3825,0.4159
