<a href="https://colab.research.google.com/github/alexlimatds/circle-2022/blob/main/RRLLJ_TF_IDF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Rhetorical Role Labeling for Legal Judgments - experiments with TF-IDF features

In this notebook we utilize TF-IDF features to represent the sentences.

In [1]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
g_drive_dir = "/content/gdrive/MyDrive/"

Mounted at /content/gdrive


In [2]:
!mkdir data
!mkdir data/train
!tar -xf {g_drive_dir}AILA_2021/AILA_2021_train.tar.xz -C data/train

train_dir = 'data/train/'

In [3]:
import pandas as pd
from os import listdir

def read_docs(dir_name):
  docs = {} # key: file name, value: dataframe with sentences and labels
  for f in listdir(dir_name):
    df = pd.read_csv(
        dir_name + f, 
        sep='\t', 
        names=['sentence', 'label'])
    docs[f] = df
  return docs

docs_train = read_docs(train_dir)
print(f'TRAIN: {len(docs_train)} documents read.')

TRAIN: 60 documents read.


In [4]:
def sentences_to_list(docs):
  sentences_list = []
  targets_list = []
  for df in docs.values():
    sentences_list.extend(df['sentence'].tolist())
    targets_list.extend(df['label'].tolist())
  return sentences_list, targets_list

def target_stats(set_name, targets):
  stats = {}
  for t in targets:
    stats[t] = stats.get(t, 0) + 1
  print(f'Statistics of the {set_name} set:')
  print(f'   Total number of sentences: {len(targets)}')
  for t, n in stats.items():
    print(f'   Number of {t} labels: {n}')

sentences_train, train_labels = sentences_to_list(docs_train)

target_stats('TRAIN', train_labels)

Statistics of the TRAIN set:
   Total number of sentences: 10024
   Number of Facts labels: 2368
   Number of Ruling by Lower Court labels: 341
   Number of Ruling by Present Court labels: 301
   Number of Argument labels: 901
   Number of Ratio of the decision labels: 3919
   Number of Precedent labels: 1523
   Number of Statute labels: 671


### TF-IDF features

In [5]:
import re

def preprocess(str):
  pstr = str
  pstr = re.sub(r'[/(){}\[\]\|@,;]', ' ', pstr) # replaces symbols with spaces
  pstr = re.sub(r'[^0-9a-z #+_]', '', pstr)     # removes bad symbols
  pstr = re.sub(r'\d+', '', pstr)               # removes numbers
  return pstr

In [6]:
%%time
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_model = TfidfVectorizer(
    preprocessor=preprocess, 
    ngram_range=(1, 3), 
    min_df=10)
tfidf_model.fit(sentences_train)

n_features = len(tfidf_model.vocabulary_)
print(f'Learned {n_features} terms.')

Learned 7438 terms.
CPU times: user 1.95 s, sys: 148 ms, total: 2.1 s
Wall time: 2.8 s


In [7]:
docs_train_tfidf = {} # key: file id, value: matrix of features
for doc_id, df in docs_train.items():
  docs_train_tfidf[doc_id] = tfidf_model.transform(df['sentence'].tolist()).toarray()


### Label encoder

In [8]:
from sklearn.preprocessing import LabelBinarizer

label_encoder = LabelBinarizer()
label_encoder.fit(train_labels)

docs_train_targets = {} # key: file id, value: matrix of one-hot encoded labels
for doc_id, df in docs_train.items():
  docs_train_targets[doc_id] = label_encoder.transform(df['label'].tolist())

n_classes = label_encoder.classes_.shape[0]
n_classes

7

### Evaluation functions

In [9]:
import numpy as np
import sklearn
from sklearn.model_selection import KFold
from sklearn.metrics import precision_recall_fscore_support
from IPython.display import display, HTML

def docs_to_sentences(docs_idx, doc_keys_list):
  features_ = None
  targets_ = []
  for idx in docs_idx:
    doc_id = doc_keys_list[idx]
    if features_ is None:
      features_ = docs_train_tfidf[doc_id]
    else:
      features_ = np.vstack((features_, docs_train_tfidf[doc_id]))
    targets_.extend(docs_train[doc_id]['label'].tolist())
  return features_, targets_

def metrics_report(title, averages, stds):
  report_df = pd.DataFrame(columns=['Score', 'Standard Deviation'])
  report_df.loc['Precision'] = [f'{averages[0]:.4f}', f'{stds[0]:.4f}']
  report_df.loc['Recall'] = [f'{averages[1]:.4f}', f'{stds[1]:.4f}']
  report_df.loc['F1'] = [f'{averages[2]:.4f}', f'{stds[2]:.4f}']
  display(HTML(f'<br><span style="font-weight: bold">{title}: cross-validation macro averages</span>'))
  display(report_df)

def classification_report(metrics):
  report_df = pd.DataFrame(columns=['Precision', 'Recall', 'F1'])
  for i, l in enumerate(label_encoder.classes_):
    report_df.loc[l] = [
      f'{metrics[i, 0]:.4f}', 
      f'{metrics[i, 1]:.4f}', 
      f'{metrics[i, 2]:.4f}', 
    ]
  display(HTML(f'<br><span style="font-weight: bold">Classification Report (cross-validation test averages)</span>'))
  display(report_df)

test_metrics = {}

def cross_validation(trainer):
  train_metrics_cross = []
  test_metrics_cross = []
  test_metrics_by_class = np.zeros((n_classes, 3)) # 3 metrics (P, R, F1)
  n_folds = 5
  skf = KFold(n_splits=n_folds) # for cross-validation
  docs_list = list(docs_train.keys())
  for train_docs_idx, test_docs_idx in skf.split(docs_list): # The cross-validation splitting is document-oriented
    # train
    train_features_fold, train_targets_fold = docs_to_sentences(train_docs_idx, docs_list)
    model = trainer(train_features_fold, train_targets_fold)
    # test
    test_features_fold, test_targets_fold = docs_to_sentences(test_docs_idx, docs_list)
    predictions = model.predict(test_features_fold)
    # averaged test metrics
    p_test, r_test, f1_test, _ = precision_recall_fscore_support(
        test_targets_fold, 
        predictions, 
        average='macro', 
        zero_division=0)
    test_metrics_cross.append([p_test, r_test, f1_test])
    # test metrics by class
    metrics = precision_recall_fscore_support(
        test_targets_fold, 
        predictions, 
        average=None, 
        zero_division=0, 
        labels=label_encoder.classes_)
    test_metrics_by_class = test_metrics_by_class + np.hstack((
        metrics[0].reshape(-1, 1),  # precision
        metrics[1].reshape(-1, 1),  # recall
        metrics[1].reshape(-1, 1))) # F1
    # train metrics
    predictions = model.predict(train_features_fold)
    p_train, r_train, f1_train, _ = precision_recall_fscore_support(
        train_targets_fold, 
        predictions, 
        average='macro', 
        zero_division=0)
    train_metrics_cross.append([p_train, r_train, f1_train])
  
  train_metrics_cross = np.array(train_metrics_cross)
  train_mean = np.mean(train_metrics_cross, axis=0)
  train_std = np.std(train_metrics_cross, axis=0)
  metrics_report('TRAIN', train_mean, train_std)

  test_metrics_cross = np.array(test_metrics_cross)
  test_mean = np.mean(test_metrics_cross, axis=0)
  test_std = np.std(test_metrics_cross, axis=0)
  metrics_report('TEST', test_mean, test_std)

  test_metrics_by_class /= n_classes
  classification_report(test_metrics_by_class)

  test_metrics[model.__class__.__name__] = test_mean

### Scikit-learn Models

#### Logistic regression

In [39]:
from sklearn.linear_model import LogisticRegression

def lr_trainer(X, y):
  logreg = LogisticRegression(solver='sag', random_state=1)
  return logreg.fit(X, y)

In [40]:
%%time

cross_validation(lr_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.8637,0.0066
Recall,0.5452,0.0164
F1,0.6071,0.0137


Unnamed: 0,Score,Standard Deviation
Precision,0.6044,0.0794
Recall,0.3558,0.0223
F1,0.3942,0.0263


Unnamed: 0,Precision,Recall,F1
Argument,0.5147,0.1596,0.1596
Facts,0.3948,0.3926,0.3926
Precedent,0.3401,0.1909,0.1909
Ratio of the decision,0.3478,0.5608,0.5608
Ruling by Lower Court,0.2143,0.0052,0.0052
Ruling by Present Court,0.6792,0.2817,0.2817
Statute,0.5313,0.1882,0.1882


CPU times: user 8min 26s, sys: 2.56 s, total: 8min 29s
Wall time: 8min 25s


#### Linear SVM

In [41]:
from sklearn.svm import LinearSVC

def linear_svm_trainer(X, y):
  svm = LinearSVC(random_state=1)
  return svm.fit(X, y)

In [42]:
%%time

cross_validation(linear_svm_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.9506,0.0025
Recall,0.8926,0.0083
F1,0.9192,0.0054


Unnamed: 0,Score,Standard Deviation
Precision,0.4856,0.042
Recall,0.3825,0.0268
F1,0.4087,0.0316


Unnamed: 0,Precision,Recall,F1
Argument,0.3698,0.2249,0.2249
Facts,0.3697,0.3864,0.3864
Precedent,0.2578,0.238,0.238
Ratio of the decision,0.3568,0.4531,0.4531
Ruling by Lower Court,0.1155,0.0252,0.0252
Ruling by Present Court,0.5424,0.3284,0.3284
Statute,0.4162,0.2565,0.2565


CPU times: user 21.4 s, sys: 2.14 s, total: 23.5 s
Wall time: 21.5 s


#### KNN

In [43]:
from sklearn.neighbors import KNeighborsClassifier

def knn_trainer(X, y):
  knn = KNeighborsClassifier(5)
  return knn.fit(X, y)

In [44]:
%%time

cross_validation(knn_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.4924,0.0243
Recall,0.3912,0.0148
F1,0.4002,0.0159


Unnamed: 0,Score,Standard Deviation
Precision,0.2886,0.0115
Recall,0.205,0.017
F1,0.2084,0.0219


Unnamed: 0,Precision,Recall,F1
Argument,0.0698,0.124,0.124
Facts,0.1842,0.1955,0.1955
Precedent,0.1403,0.2734,0.2734
Ratio of the decision,0.273,0.1526,0.1526
Ruling by Lower Court,0.0324,0.0083,0.0083
Ruling by Present Court,0.6373,0.2103,0.2103
Statute,0.1063,0.0609,0.0609


CPU times: user 3min 35s, sys: 4.56 s, total: 3min 40s
Wall time: 2min 6s


#### MLP Classifier

In [45]:
from sklearn.neural_network import MLPClassifier

def mlp_trainer(X, y):
  # Default MLP from scikit-learn
  mlp = MLPClassifier(early_stopping=True, random_state=1)
  return mlp.fit(X, y)

In [46]:
%%time

cross_validation(mlp_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.876,0.0154
Recall,0.6994,0.0447
F1,0.7488,0.0472


Unnamed: 0,Score,Standard Deviation
Precision,0.5336,0.0566
Recall,0.3751,0.0315
F1,0.4077,0.0364


Unnamed: 0,Precision,Recall,F1
Argument,0.4111,0.1963,0.1963
Facts,0.3801,0.3975,0.3975
Precedent,0.2894,0.25,0.25
Ratio of the decision,0.3546,0.4857,0.4857
Ruling by Lower Court,0.1349,0.0075,0.0075
Ruling by Present Court,0.6351,0.3105,0.3105
Statute,0.4627,0.2279,0.2279


CPU times: user 4min 31s, sys: 1min 36s, total: 6min 7s
Wall time: 3min 19s


#### Decision Tree

In [47]:
from sklearn.tree import DecisionTreeClassifier

def decision_tree_trainer(X, y):
  dtree = DecisionTreeClassifier(max_depth=5, random_state=1)
  return dtree.fit(X, y)

In [48]:
%%time

cross_validation(decision_tree_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.5736,0.1141
Recall,0.2523,0.0159
F1,0.2518,0.0202


Unnamed: 0,Score,Standard Deviation
Precision,0.4055,0.0742
Recall,0.2382,0.0431
F1,0.2331,0.058


Unnamed: 0,Precision,Recall,F1
Argument,0.4672,0.0784,0.0784
Facts,0.1424,0.3109,0.3109
Precedent,0.4695,0.1165,0.1165
Ratio of the decision,0.336,0.4996,0.4996
Ruling by Lower Court,0.0,0.0,0.0
Ruling by Present Court,0.2857,0.0579,0.0579
Statute,0.3269,0.128,0.128


CPU times: user 39.5 s, sys: 1.27 s, total: 40.8 s
Wall time: 40.6 s


#### Random Forest

In [49]:
from sklearn.ensemble import RandomForestClassifier

def random_forest_trainer(X, y):
  rforest = RandomForestClassifier(max_depth=5, n_estimators=10, random_state=1)
  return rforest.fit(X, y)

In [50]:
%%time

cross_validation(random_forest_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.6157,0.1034
Recall,0.1599,0.0053
F1,0.1147,0.0089


Unnamed: 0,Score,Standard Deviation
Precision,0.4382,0.0769
Recall,0.1524,0.0037
F1,0.0996,0.0147


Unnamed: 0,Precision,Recall,F1
Argument,0.4048,0.0059,0.0059
Facts,0.3575,0.0143,0.0143
Precedent,0.5828,0.0151,0.0151
Ratio of the decision,0.2744,0.7075,0.7075
Ruling by Lower Court,0.0,0.0,0.0
Ruling by Present Court,0.2857,0.0121,0.0121
Statute,0.2857,0.007,0.007


CPU times: user 19.8 s, sys: 1.35 s, total: 21.1 s
Wall time: 21 s


#### AdaBoost

In [51]:
from sklearn.ensemble import AdaBoostClassifier

def adaboost_trainer(X, y):
  adab = AdaBoostClassifier(random_state=1)
  return adab.fit(X, y)

In [52]:
%%time

cross_validation(adaboost_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.5423,0.0364
Recall,0.3349,0.0245
F1,0.3641,0.0244


Unnamed: 0,Score,Standard Deviation
Precision,0.4721,0.04
Recall,0.295,0.0389
F1,0.3161,0.0456


Unnamed: 0,Precision,Recall,F1
Argument,0.4641,0.1697,0.1697
Facts,0.296,0.1796,0.1796
Precedent,0.4252,0.1261,0.1261
Ratio of the decision,0.3101,0.5708,0.5708
Ruling by Lower Court,0.0952,0.006,0.006
Ruling by Present Court,0.4322,0.2356,0.2356
Statute,0.3378,0.1873,0.1873


CPU times: user 6min 7s, sys: 4.18 s, total: 6min 11s
Wall time: 6min 9s


#### Naive Bayes

In [53]:
from sklearn.naive_bayes import GaussianNB

def naive_bayes_trainer(X, y):
  nb = GaussianNB()
  return nb.fit(X, y)

In [54]:
%%time
cross_validation(naive_bayes_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.6508,0.0136
Recall,0.8317,0.003
F1,0.6907,0.0118


Unnamed: 0,Score,Standard Deviation
Precision,0.2866,0.0316
Recall,0.285,0.0145
F1,0.2703,0.0175


Unnamed: 0,Precision,Recall,F1
Argument,0.1211,0.0904,0.0904
Facts,0.3399,0.3044,0.3044
Precedent,0.2098,0.1878,0.1878
Ratio of the decision,0.3259,0.3777,0.3777
Ruling by Lower Court,0.0432,0.0485,0.0485
Ruling by Present Court,0.1329,0.2872,0.2872
Statute,0.2599,0.1292,0.1292


CPU times: user 34.5 s, sys: 2.07 s, total: 36.6 s
Wall time: 36.4 s


#### XGBoost

In [55]:
from xgboost.sklearn import XGBClassifier

def xgboost_trainer(X, y):
  xgboost = XGBClassifier()
  return xgboost.fit(X, y)

In [56]:
%%time
cross_validation(xgboost_trainer)

Unnamed: 0,Score,Standard Deviation
Precision,0.8108,0.007
Recall,0.4521,0.0191
F1,0.5263,0.019


Unnamed: 0,Score,Standard Deviation
Precision,0.605,0.1047
Recall,0.3402,0.0235
F1,0.3797,0.0278


Unnamed: 0,Precision,Recall,F1
Argument,0.5161,0.2144,0.2144
Facts,0.3988,0.295,0.295
Precedent,0.4456,0.1467,0.1467
Ratio of the decision,0.3272,0.5982,0.5982
Ruling by Lower Court,0.2857,0.0052,0.0052
Ruling by Present Court,0.6056,0.2606,0.2606
Statute,0.4458,0.181,0.181


CPU times: user 1h 1min 35s, sys: 11.6 s, total: 1h 1min 47s
Wall time: 1h 1min 31s


### PyTorch models

In [10]:
import torch
from torch.utils.data import Dataset

class MyDataset(Dataset):
  def __init__(self, inputs, targets):
    self.X = torch.from_numpy(inputs).float()
    self.y = torch.from_numpy(label_encoder.transform(targets)).float()

  def __len__(self):
    return len(self.X)

  def __getitem__(self, idx):
    return [self.X[idx], self.y[idx]]


In [31]:
from torch.optim import Adam
from torch.utils.data import DataLoader
from sklearn.model_selection import ShuffleSplit

torch.manual_seed(1)

class MLPTrainer:

  def __init__(self, model, l2_penalty=0.0001):
    self.model = model
    # The training replicates the default configuration from scikit-learn's MLPClassifier
    self.criterion = torch.nn.CrossEntropyLoss()
    self.lambd = l2_penalty # weight decay for Adam optmizer
    self.n_epochs = 200

  def fit(self, inputs, targets, verbose=False):
    # early stopping params and variables
    tol = 0.001
    n_iter_no_change = 7
    early_stop_count = 0
    previous_loss_validation = float("inf")
    # splitting train data into train and validation sets in order to perform early stopping
    spl = ShuffleSplit(n_splits=1, train_size=0.9, random_state=1)
    targets = np.array(targets)
    for train_index, val_index in spl.split(inputs):
      # getting set's data
      train_x = inputs[train_index]
      train_y = targets[train_index]
      validation_x = inputs[val_index]
      validation_y = targets[val_index]
      train_dl = DataLoader(
        MyDataset(train_x, train_y), 
        batch_size=64)
      validation_dl = DataLoader(
        MyDataset(validation_x, validation_y), 
        batch_size=len(validation_x))
      # training
      self.model.train()
      optimizer = Adam(
        self.model.parameters(), 
        weight_decay=self.lambd)
      for epoch in range(self.n_epochs):
        # iterate mini batches
        for x, y in train_dl:
          optimizer.zero_grad()
          yhat = self.model(x)
          loss = self.criterion(yhat, y)
          loss.backward()
          optimizer.step()
        # Early stopping
        for x, y in validation_dl:
          loss_validation = self.criterion(self.model(x), y)
        if loss_validation >= previous_loss_validation - tol:
          early_stop_count += 1
        else:
          early_stop_count = 0
        if early_stop_count == n_iter_no_change:
          break
        previous_loss_validation = loss_validation
    if verbose:
      print(f'TRAIN: Stopped at epoch {epoch + 1} {"(MAX EPOCH)" if epoch + 1 == self.n_epochs else ""}')

    self.model.eval()
    return self

  def predict(self, X):
    y = self.model.forward(torch.from_numpy(X).float())
    return label_encoder.inverse_transform(y.detach().numpy())


#### TorchMLP

In [29]:
from torch.nn.init import xavier_uniform_
from torch.nn.init import kaiming_uniform_

class TorchMLP(torch.nn.Module):
  def __init__(self, n_inputs, n_classes):
    super(TorchMLP, self).__init__()
    # hidden layer
    n_hidden_units = 100
    hidden1 = torch.nn.Linear(n_inputs, n_hidden_units)
    kaiming_uniform_(hidden1.weight, nonlinearity='relu')
    relu = torch.nn.ReLU()
    # output layer
    output = torch.nn.Linear(n_hidden_units, n_classes)
    xavier_uniform_(output.weight)
    # There's no need of softmax function because it's included in the CrossEntropyLoss function
    self.layers = torch.nn.Sequential(
      hidden1, 
      relu, 
      output)
  
  def forward(self, X):
    return self.layers(X)
  

In [22]:
def torch_mlp_trainer(X, y):
  trainer = MLPTrainer(TorchMLP(n_features, n_classes), l2_penalty=0.0015)
  return trainer.fit(X, y, verbose=True)

In [32]:
%%time
cross_validation(torch_mlp_trainer)

TRAIN: Stopped at epoch 17 
TRAIN: Stopped at epoch 21 
TRAIN: Stopped at epoch 20 
TRAIN: Stopped at epoch 20 
TRAIN: Stopped at epoch 19 


Unnamed: 0,Score,Standard Deviation
Precision,0.8994,0.0038
Recall,0.6906,0.0262
F1,0.736,0.0304


Unnamed: 0,Score,Standard Deviation
Precision,0.5378,0.0758
Recall,0.3689,0.0222
F1,0.4014,0.0262


Unnamed: 0,Precision,Recall,F1
Argument,0.4179,0.1932,0.1932
Facts,0.3731,0.3989,0.3989
Precedent,0.2757,0.236,0.236
Ratio of the decision,0.3551,0.4913,0.4913
Ruling by Lower Court,0.1429,0.0032,0.0032
Ruling by Present Court,0.6474,0.305,0.305
Statute,0.4767,0.2169,0.2169


CPU times: user 2min 39s, sys: 22.5 s, total: 3min 1s
Wall time: 3min 1s


#### TorchMLPMaxPool

In [33]:
import math

class TorchMLPMaxPool(torch.nn.Module):
  def __init__(self, n_inputs, n_classes):
    super(TorchMLPMaxPool, self).__init__()
    # max pool
    window_size = 2
    max_pool = torch.nn.MaxPool1d(window_size, ceil_mode=True)
    n_out_pool = math.ceil((n_inputs - window_size) / window_size + 1)
    # hidden layers
    n_hidden_units = 100
    hidden1 = torch.nn.Linear(n_out_pool, n_hidden_units)
    kaiming_uniform_(hidden1.weight, nonlinearity='relu')
    relu = torch.nn.ReLU()
    # output layer
    output = torch.nn.Linear(n_hidden_units, n_classes)
    xavier_uniform_(output.weight)
    # There's no need of softmax function because it's included in the CrossEntropyLoss function
    self.layers = torch.nn.Sequential(
      max_pool, 
      hidden1, 
      relu, 
      output)
  
  def forward(self, X):
    return self.layers(X)


In [34]:
def torch_mlp_maxpool_trainer(X, y):
  trainer = MLPTrainer(TorchMLPMaxPool(n_features, n_classes), l2_penalty=0.0015)
  return trainer.fit(X, y, verbose=True)

In [35]:
%%time
cross_validation(torch_mlp_maxpool_trainer)

TRAIN: Stopped at epoch 19 
TRAIN: Stopped at epoch 22 
TRAIN: Stopped at epoch 21 
TRAIN: Stopped at epoch 19 
TRAIN: Stopped at epoch 19 


Unnamed: 0,Score,Standard Deviation
Precision,0.8649,0.0057
Recall,0.6318,0.0241
F1,0.6779,0.0245


Unnamed: 0,Score,Standard Deviation
Precision,0.5374,0.0811
Recall,0.368,0.0246
F1,0.3989,0.0301


Unnamed: 0,Precision,Recall,F1
Argument,0.4061,0.1959,0.1959
Facts,0.3651,0.397,0.397
Precedent,0.2902,0.2463,0.2463
Ratio of the decision,0.3497,0.4807,0.4807
Ruling by Lower Court,0.1905,0.0052,0.0052
Ruling by Present Court,0.6356,0.3053,0.3053
Statute,0.4497,0.2097,0.2097


CPU times: user 1min 41s, sys: 3.75 s, total: 1min 45s
Wall time: 1min 45s


#### TorchLogisticRegression

In [36]:
class TorchLogisticRegression(torch.nn.Module):
  def __init__(self, n_inputs, n_classes, verbose=False):
    super(TorchLogisticRegression, self).__init__()
    self.verbose = verbose
    self.layer = torch.nn.Linear(n_inputs, n_classes)
    xavier_uniform_(self.layer.weight)

  def forward(self, X):
    y = self.layer(X)
    return y
  
  def predict(self, X):
    y = self.forward(torch.from_numpy(X).float())
    return label_encoder.inverse_transform(y.detach().numpy())

  def fit(self, X, y):
    # learning algorithm's params
    learning_rate = 0.9
    momentum = 0.9
    lambda_param = 0.0001 # L2 regularization
    n_iterations = 1000
    decay_rate = 0.95  # learning rate decay
    # early stopping params and variables
    tol = 0.001
    n_iter_no_change = 10
    early_stop_count = 0
    previous_loss = float("inf")
    # loss function and optmizer
    self.train()
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(
      self.parameters(), 
      lr=learning_rate, 
      momentum=momentum, 
      weight_decay=lambda_param)
    lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(
        optimizer=optimizer, 
        gamma=decay_rate)
    # Data loader
    batch_size = 64
    train_dl = DataLoader(
      MyDataset(X, y), 
      batch_size=batch_size, 
      shuffle=True)
    # Train loop
    for i in range(1, n_iterations + 1):
      # enumerate mini batches
      for i, (x_batch, y_batch) in enumerate(train_dl):
        optimizer.zero_grad()
        y_hat = self(x_batch)
        loss = criterion(y_hat, y_batch)
        loss.backward()
        optimizer.step()
      lr_scheduler.step()
      # early stop
      if loss >= previous_loss - tol:
        early_stop_count += 1
      else:
        early_stop_count = 0
      if early_stop_count == n_iter_no_change:
        break
      previous_loss = loss
    
    if self.verbose:
      print(f'Stopped at iteration {i}')
    self.eval()
    return self


In [37]:
def torch_lr_trainer(X, y):
  lr_ = TorchLogisticRegression(n_features, n_classes, verbose=True)
  return lr_.fit(X, y)

In [38]:
%%time
cross_validation(torch_lr_trainer)

Stopped at iteration 132
Stopped at iteration 130
Stopped at iteration 118
Stopped at iteration 122
Stopped at iteration 121


Unnamed: 0,Score,Standard Deviation
Precision,0.8672,0.0052
Recall,0.5715,0.0212
F1,0.6352,0.0192


Unnamed: 0,Score,Standard Deviation
Precision,0.5969,0.0823
Recall,0.363,0.0234
F1,0.4012,0.0272


Unnamed: 0,Precision,Recall,F1
Argument,0.4947,0.1675,0.1675
Facts,0.3911,0.3949,0.3949
Precedent,0.3337,0.1982,0.1982
Ratio of the decision,0.3499,0.5511,0.5511
Ruling by Lower Court,0.2143,0.0052,0.0052
Ruling by Present Court,0.6754,0.296,0.296
Statute,0.5252,0.2021,0.2021


CPU times: user 17min 44s, sys: 17.3 s, total: 18min 1s
Wall time: 17min 59s


### Summary

In [57]:
metrics_df = pd.DataFrame(columns=['Precision', 'Recall', 'F1'])
for model_name, metrics in test_metrics.items():
  metrics_df.loc[model_name] = [f'{metrics[0]:.4f}', f'{metrics[1]:.4f}', f'{metrics[2]:.4f}']
metrics_display = display(metrics_df, display_id='metrics_table')

Unnamed: 0,Precision,Recall,F1
TorchMLP,0.5361,0.3691,0.4012
MLPTrainer,0.5374,0.368,0.3989
TorchLogisticRegression,0.5969,0.363,0.4012
LogisticRegression,0.6044,0.3558,0.3942
LinearSVC,0.4856,0.3825,0.4087
KNeighborsClassifier,0.2886,0.205,0.2084
MLPClassifier,0.5336,0.3751,0.4077
DecisionTreeClassifier,0.4055,0.2382,0.2331
RandomForestClassifier,0.4382,0.1524,0.0996
AdaBoostClassifier,0.4721,0.295,0.3161
