In [1]:
# dependencies to run the notebook

# !pip install torch==1.12.1
# !pip install torchmetrics==0.10.2
# !pip install torchvision==0.14.0
# !pip install texttable==1.6.4


<span style="color:darkviolet">
<font size="3">Download the below files from https://drive.google.com/drive/folders/1q50QMurzK9a5l4JBHWjf8VuWcZkbF7PM to run this notebook : <br>
1) train_bert_embeddings.pkl <br>
2) test_bert_embeddings.pkl <br> </font>
</span>


In [2]:
import pickle
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torch.autograd import Variable
import torch.optim as optim
import warnings
import numpy as np
import torch.nn.functional as F
import pandas as pd
from sklearn.metrics import classification_report
from texttable import Texttable
from torch.utils.data.sampler import SubsetRandomSampler

warnings.filterwarnings("ignore")


In [None]:
EMBEDDINGS_NAME = "xlnet"


In [None]:
BASE_PATH = "."
TRAIN_EMBEDDINGS = f"{BASE_PATH}/../embeddings/train_{EMBEDDINGS_NAME}_embeddings.pkl"
TEST_EMBEDDINGS = f"{BASE_PATH}/../embeddings/test_{EMBEDDINGS_NAME}_embeddings.pkl"
TRAIN_DATASET_PATH = f"{BASE_PATH}/../legal_bert/data/tos_clauses_train.csv"
TEST_DATASET_PATH = f"{BASE_PATH}/../legal_bert/data/tos_clauses_dev.csv"
RNN_MODEL_PATH = "../models/rnn_{EMBEDDINGS_NAME}_model.pt"
GRU_MODEL_PATH = "../models/gru_{EMBEDDINGS_NAME}_model.pt"
LSTM_MODEL_PATH = "../models/lstm_{EMBEDDINGS_NAME}_model.pt"
BILSTM_MODEL_PATH = "../models/bilstm_{EMBEDDINGS_NAME}_model.pt"


In [3]:
# Read train BERT embeddings
with open(TRAIN_EMBEDDINGS, "rb") as f:
    training_data = pickle.load(f)


In [4]:
# Read test BERT embeddings
with open(TEST_EMBEDDINGS, "rb") as f:
    testing_data = pickle.load(f)


In [5]:
# check
for item in training_data:
    print("The data is : ", item)


The data is :  embeddings
The data is :  tokenized_txt


In [6]:
len(training_data["embeddings"]), training_data["embeddings"][0].shape


(7531, torch.Size([1, 512, 768]))

In [7]:
len(training_data["tokenized_txt"]), len(training_data["tokenized_txt"][0])


(7531, 512)

<span style="color:darkviolet">
<font size="4">Get the labels from train and test files.</font>
</span>


In [8]:
TRAIN_DATASET_PATH = "../legal_bert/data/tos_clauses_train.csv"
TEST_DATASET_PATH = "../legal_bert/data/tos_clauses_dev.csv"


In [9]:
train_df = pd.read_csv(TRAIN_DATASET_PATH, header=0)
test_df = pd.read_csv(TEST_DATASET_PATH, header=0)


In [10]:
train_targets = train_df.label.values
test_targets = test_df.label.values


In [11]:
test_targets


array([0, 0, 0, ..., 0, 1, 0])

In [12]:
device = None
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

print(f"Using Device: {device}")


Using Device: cpu


<span style="color:darkviolet">
<font size="4">Create Dataset, Train and Test Classes</font>
</span>


In [13]:
class Dataset(object):
    """An abstract class representing a Dataset.
    All other datasets should subclass it. All subclasses should
    override ``__len__``, that provides the size of the dataset,
    and ``__getitem__``, supporting integer indexing in range
    from 0 to len(self) exclusive.
    """

    def __getitem__(self, index):
        raise NotImplementedError

    def __len__(self):
        raise NotImplementedError

    def __add__(self, other):
        return ConcatDataset([self, other])


In [14]:
class TOSDataset(Dataset):
    def __init__(self, X, Y, transform=None):
        self.data1 = X
        self.data2 = Y
        self.transform = transform

    def __len__(self):
        return len(self.data1)

    def __getitem__(self, index):
        x = self.data1[index]
        y = self.data2[index]

        if self.transform is not None:
            x = torch.tensor(x)

        return torch.squeeze(x, dim=1), torch.tensor(y)


In [15]:
test_len = len(test_df)
train_len = len(train_df)
X_train_tensor = TOSDataset(train_df["sentences"], train_df["label"])
# X_test_tensor = Train_Model(test_df)

num_train = len(X_train_tensor)
indices = list(range(num_train))
np.random.shuffle(indices)
# split = int(np.floor(num_train))
# train_idx = indices[split:]

train_sampler = SubsetRandomSampler(indices)
# valid_sampler = SubsetRandomSampler(valid_idx)
print(train_sampler)
train_df_by_index = train_df.loc[indices]
# val_df_by_index = df_train.loc[valid_idx]
train_fair = sum(train_df_by_index["label"] == 0)
train_unfair = sum(train_df_by_index["label"] == 1)
# val_fair = sum(val_df_by_index['label'] == 0)
# val_unfair = sum(val_df_by_index['label'] == 1)

print("train_fair:" + str(train_fair))
print("train_unfair:" + str(train_unfair))
# print("val_fair:" + str(val_fair))
# print("val_unfair:" + str(val_unfair))


<torch.utils.data.sampler.SubsetRandomSampler object at 0x17f907520>
train_fair:6705
train_unfair:826


In [16]:
train_data = TOSDataset(training_data["embeddings"], train_targets, transform=transforms.ToTensor())
test_data = TOSDataset(testing_data["embeddings"], test_targets, transform=transforms.ToTensor())


<span style="color:darkviolet">
<font size="4">Prepare Data loaders</font>
</span>


In [17]:
# how many samples per batch to load
BATCH_SIZE = 20

# number of subprocesses to use for data loading
NUM_WORKERS = 0


In [18]:
# prepare data loaders
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=NUM_WORKERS)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)


In [19]:
# check sizes
dataiter = iter(train_loader)
sample_x, sample_y = dataiter.next()

print("Sample input size: ", sample_x.size())  # batch_size, seq_length
# print("Sample input: \n", sample_x)
print()
print("Sample label size: ", sample_y.size())  # batch_size
# print("Sample label: \n", sample_y)


Sample input size:  torch.Size([20, 1, 512, 768])

Sample label size:  torch.Size([20])


In [20]:
torch.squeeze(sample_x, dim=1).shape


torch.Size([20, 512, 768])

<span style="color:darkviolet">
<font size="5">SIMPLE RNN</font><br>
<font size="2.5">Number of hidden dimension : 20</font> <br>
<font size="2.5">Number of layers: 1</font> <br>
<font size="2.5">Number of epochs: 5</font> <br>
</span>


In [21]:
class RNNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):

        super(RNNet, self).__init__()

        # Number of hidden dimensions
        self.hidden_dim = hidden_dim

        # RNN
        self.rnn = nn.RNN(input_dim, hidden_dim, num_layers=3, batch_first=True, nonlinearity="relu")

        # Readout layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):

        # Initialize hidden state with zeros
        h0 = Variable(torch.zeros(3, x.size(0), self.hidden_dim))

        # One time step
        out, hn = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out


In [22]:
import time


def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs


In [None]:
EMBEDDING_DIM = 768
OUTPUT_DIM = 2


In [28]:
RNN_HIDDEN_DIM = 1024
RNN_N_EPOCHS = 6


In [29]:
from sklearn.utils.class_weight import compute_class_weight

class_weight = compute_class_weight(
    "balanced", classes=np.unique(train_df_by_index["label"]), y=train_df_by_index["label"]
)
class_weight


array([0.56159582, 4.55871671])

In [30]:
model = RNNet(EMBEDDING_DIM, RNN_HIDDEN_DIM, OUTPUT_DIM)

loss_fn = nn.CrossEntropyLoss(weight=torch.FloatTensor(class_weight))
optimizer = optim.Adam(model.parameters(), lr=1e-4)
test_min_loss = np.inf

for epoch in range(RNN_N_EPOCHS):

    start_time = time.time()
    model.train()
    train_loss = 0.0
    test_loss = 0.0
    for inputs, target in train_loader:
        inputs, target = inputs.to(device), target.to(device)
        optimizer.zero_grad()
        inputs = torch.squeeze(inputs, dim=1)
        output = model(inputs)
        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    model.eval()
    for inputs, target in test_loader:
        inputs, target = inputs.to(device), target.to(device)
        inputs = torch.squeeze(inputs, dim=1)
        output = model(inputs)
        loss = loss_fn(output, target)
        test_loss += loss.item()

    train_loss = train_loss / len(train_loader.dataset)
    test_loss = test_loss / len(test_loader.dataset)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print(f"Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s")
    print("\tTraining Loss: {:.6f} \Test Loss: {:.6f}".format(train_loss, test_loss))
    if test_loss <= test_min_loss:
        print("Test loss decreased ({:.6f} --> {:.6f}). Saving model...".format(test_min_loss, test_loss))
        torch.save(model.state_dict(), RNN_MODEL_PATH)
        test_min_loss = test_loss


Epoch: 01 | Epoch Time: 8m 24s
	Training Loss: 0.034758 \Test Loss: 0.035049
Test loss decreased (inf --> 0.035049). Saving model...


In [None]:
y_pred_list = []
y_targ_list = []
model = RNNet(EMBEDDING_DIM, RNN_HIDDEN_DIM, OUTPUT_DIM).to(device)
model.load_state_dict(torch.load(RNN_MODEL_PATH))
model.eval()

with torch.no_grad():
    for inputs, target in test_loader:
        inputs, target = inputs.to(device), target.to(device)
        inputs = torch.squeeze(inputs, dim=1)
        y_test_pred = model(inputs)
        _, y_test_pred = torch.max(y_test_pred, 1)
        y_pred_tag = y_test_pred
        y_pred_list.append(y_pred_tag.cpu().numpy())
        y_targ_list.append(target.cpu().numpy())

y_pred_list = [x.squeeze().tolist() for x in y_pred_list]
y_targ_list = [x.squeeze().tolist() for x in y_targ_list]
y_pred_list = [x for sublist in y_pred_list for x in sublist]
y_targ_list = [x for sublist in y_targ_list for x in sublist]

print(classification_report(y_targ_list, y_pred_list))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1677
           1       0.11      1.00      0.20       206

    accuracy                           0.11      1883
   macro avg       0.05      0.50      0.10      1883
weighted avg       0.01      0.11      0.02      1883



              precision    recall  f1-score   support

           0       0.93      0.76      0.84      1677
           1       0.22      0.54      0.31       206

    accuracy                           0.74      1883
   macro avg       0.57      0.65      0.57      1883
weighted avg       0.85      0.74      0.78      1883



Accuracy of simple RNN : 0.7413701415061951
F1 score of simple RNN : 0.5635073184967041
Precision of simple RNN : 0.5633978843688965
Recall of simple RNN : 0.6227356195449829


<span style="color:darkviolet">
<font size="5">Gated RNN</font><br>
</span>


In [30]:
class GRU_Network(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):

        super(GRU_Network, self).__init__()

        # Number of hidden dimensions
        self.hidden_dim = hidden_dim

        # RNN
        self.rnn = nn.GRU(input_dim, hidden_dim, num_layers=1, batch_first=True)

        # Readout layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):

        # Initialize hidden state with zeros
        h0 = Variable(torch.zeros(1, x.size(0), self.hidden_dim))

        # One time step
        out, hn = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out


In [None]:
GRU_HIDDEN_DIM = 512
GRU_N_EPOCHS = 6


In [31]:
model_gru = GRU_Network(EMBEDDING_DIM, GRU_HIDDEN_DIM, OUTPUT_DIM)
print(model_gru)


GRU_Network(
  (rnn): GRU(768, 20, batch_first=True)
  (fc): Linear(in_features=20, out_features=2, bias=True)
)


In [32]:
def train_GRU(model, train_loader, optimizer, criterion):

    epoch_loss = 0
    epoch_acc = 0

    model.train()

    for batch in train_loader:

        train = Variable(batch[0].view(-1, 512, 768))
        labels = Variable(batch[1])

        optimizer.zero_grad()

        output = model(train)

        loss = criterion(output, labels)

        acc = binary_accuracy(output.argmax(-1), labels)

        loss.backward()

        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss / len(train_loader), epoch_acc / len(train_loader)


In [33]:
N_EPOCHS = 5

best_valid_loss = float("inf")

criterion = nn.CrossEntropyLoss(weight=torch.FloatTensor([1 / train_fair, 1 / train_unfair]))
# criterion = nn.NLLLoss()

optimizer_gru = optim.Adam(model_gru.parameters(), lr=1e-4)

for epoch in range(N_EPOCHS):

    start_time = time.time()

    gru_train_loss, gru_train_acc = train_RNN(model_gru, train_loader, optimizer_gru, criterion)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print(f"Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s")
    print(
        f"\tTrain Loss: \
    {gru_train_loss:.3f} | Train Acc: {gru_train_acc*100:.2f}%"
    )


Epoch: 01 | Epoch Time: 0m 31s
	Train Loss:     0.679 | Train Acc: 74.31%
Epoch: 02 | Epoch Time: 0m 26s
	Train Loss:     0.656 | Train Acc: 73.69%
Epoch: 03 | Epoch Time: 0m 27s
	Train Loss:     0.644 | Train Acc: 72.74%
Epoch: 04 | Epoch Time: 0m 26s
	Train Loss:     0.633 | Train Acc: 74.21%
Epoch: 05 | Epoch Time: 0m 26s
	Train Loss:     0.627 | Train Acc: 72.91%


In [34]:
test_loader_predict = torch.utils.data.DataLoader(test_data, batch_size=1, num_workers=0)


def predict(model, dataloader):
    prediction_list = []
    for i, batch in enumerate(dataloader):
        test = Variable(batch.view(-1, 512, 768))
        outputs = model(test)
        _, predicted = torch.max(outputs.data, 1)
        prediction_list.append(predicted.cpu())
    return prediction_list


predictions_gru = predict(model_gru, test_loader_predict)

a_tensor = torch.IntTensor(predictions_gru)
b_tensor = torch.IntTensor(test_targets)

accuracy = Accuracy()
gru_test_acc = accuracy(a_tensor, b_tensor).item()

f1 = F1Score(num_classes=2, average="macro")  # checked if weighted can be used
gru_f1_score = f1(a_tensor, b_tensor).item()

precision = Precision(average="macro", num_classes=2)
gru_precision = precision(a_tensor, b_tensor).item()

recall = Recall(average="macro", num_classes=2)
gru_recall = recall(a_tensor, b_tensor).item()

print("Accuracy of Gated RNN :", gru_test_acc)
print("F1 score of Gated RNN :", gru_f1_score)
print("Precision of Gated RNN :", gru_precision)
print("Recall of Gated RNN :", gru_recall)


Accuracy of Gated RNN : 0.6611789464950562
F1 score of Gated RNN : 0.535663366317749
Precision of Gated RNN : 0.5644705295562744
Recall of Gated RNN : 0.6543599367141724


In [35]:
table = Texttable()
table.set_cols_dtype(["a", "f", "f", "f", "f", "f"])
table.set_precision(5)
table.add_rows(
    [
        ["Model", "Train accuracy", "Test Accuracy", "F1-score", "Precision", "Recall"],
        ["Gated RNN", gru_train_acc, gru_test_acc, gru_f1_score, gru_precision, gru_recall],
    ]
)
print(table.draw(), "\n")


+-----------+----------------+---------------+----------+-----------+---------+
|   Model   | Train accuracy | Test Accuracy | F1-score | Precision | Recall  |
| Gated RNN | 0.72912        | 0.66118       | 0.53566  | 0.56447   | 0.65436 |
+-----------+----------------+---------------+----------+-----------+---------+ 



<span style="color:darkviolet">
<font size="5">LSTM</font><br>
</span>


In [36]:
# LSTM


class LSTM_Network(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):

        super(LSTM_Network, self).__init__()

        # Number of hidden dimensions
        self.hidden_dim = hidden_dim

        # RNN
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=1, batch_first=True)

        # Readout layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):

        # Initialize hidden state with zeros
        h0 = Variable(torch.zeros(1, x.size(0), self.hidden_dim))
        c0 = Variable(torch.zeros(1, x.size(0), self.hidden_dim))

        # One time step
        out, (hn, cn) = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])

        return out


In [37]:
def train_LSTM(model, train_loader, optimizer, criterion):

    epoch_loss = 0
    epoch_acc = 0

    model.train()

    for batch in train_loader:

        train = Variable(batch[0].view(-1, 512, 768))
        labels = Variable(batch[1])

        optimizer.zero_grad()

        output = model(train)

        loss = criterion(output, labels)

        acc = binary_accuracy(output.argmax(-1), labels)

        loss.backward()

        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss / len(train_loader), epoch_acc / len(train_loader)


In [38]:
model_lstm = LSTM_Network(EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
print(model_lstm)


LSTM_Network(
  (lstm): LSTM(768, 20, batch_first=True)
  (fc): Linear(in_features=20, out_features=2, bias=True)
)


In [39]:
N_EPOCHS = 5

best_valid_loss = float("inf")

criterion = nn.CrossEntropyLoss(weight=torch.FloatTensor([1 / train_fair, 1 / train_unfair]))

optimizer = optim.Adam(model_lstm.parameters(), lr=1e-4)

for epoch in range(N_EPOCHS):

    start_time = time.time()

    lstm_train_loss, lstm_train_acc = train_LSTM(model_lstm, train_loader, optimizer, criterion)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print(f"Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s")
    print(
        f"\tTrain Loss: \
    {lstm_train_loss:.3f} | Train Acc: {lstm_train_acc*100:.2f}%"
    )


Epoch: 01 | Epoch Time: 0m 31s
	Train Loss:     0.672 | Train Acc: 74.67%
Epoch: 02 | Epoch Time: 0m 27s
	Train Loss:     0.652 | Train Acc: 73.81%
Epoch: 03 | Epoch Time: 0m 27s
	Train Loss:     0.643 | Train Acc: 74.06%
Epoch: 04 | Epoch Time: 0m 27s
	Train Loss:     0.634 | Train Acc: 73.18%
Epoch: 05 | Epoch Time: 0m 28s
	Train Loss:     0.630 | Train Acc: 74.97%


In [40]:
test_loader_predict = torch.utils.data.DataLoader(test_data, batch_size=1, num_workers=NUM_WORKERS)


def predict(model, dataloader):
    prediction_list = []
    for i, batch in enumerate(dataloader):
        test = Variable(batch.view(-1, 512, 768))
        outputs = model(test)
        _, predicted = torch.max(outputs.data, 1)
        prediction_list.append(predicted.cpu())
    return prediction_list


predictions_lstm = predict(model_lstm, test_loader_predict)

a_tensor = torch.IntTensor(predictions_lstm)
b_tensor = torch.IntTensor(test_targets)

accuracy = Accuracy()
lstm_test_acc = accuracy(a_tensor, b_tensor).item()

f1 = F1Score(num_classes=2, average="macro")  # checked if weighted can be used
lstm_f1_score = f1(a_tensor, b_tensor).item()

precision = Precision(average="macro", num_classes=2)
lstm_precision = precision(a_tensor, b_tensor).item()

recall = Recall(average="macro", num_classes=2)
lstm_recall = recall(a_tensor, b_tensor).item()

print("Accuracy of LSTM :", lstm_test_acc)
print("F1 score of LSTM :", lstm_f1_score)
print("Precision of LSTM :", lstm_precision)
print("Recall of of LSTM :", lstm_recall)


Accuracy of LSTM : 0.7456187009811401
F1 score of LSTM : 0.5636465549468994
Precision of LSTM : 0.562571108341217
Recall of of LSTM : 0.6187337636947632


In [41]:
table = Texttable()
table.set_cols_dtype(["a", "f", "f", "f", "f", "f"])
table.set_precision(5)
table.add_rows(
    [
        ["Model", "Train accuracy", "Test Accuracy", "F1-score", "Precision", "Recall"],
        ["LSTM", lstm_train_acc, lstm_test_acc, lstm_f1_score, lstm_precision, lstm_recall],
    ]
)
print(table.draw(), "\n")


+-------+----------------+---------------+----------+-----------+---------+
| Model | Train accuracy | Test Accuracy | F1-score | Precision | Recall  |
| LSTM  | 0.74970        | 0.74562       | 0.56365  | 0.56257   | 0.61873 |
+-------+----------------+---------------+----------+-----------+---------+ 



<span style="color:darkviolet">
<font size="5">Bi-LSTM</font><br>
</span>


In [42]:
# LSTM


class Bi_LSTM_Network(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):

        super(Bi_LSTM_Network, self).__init__()

        # Number of hidden dimensions
        self.hidden_dim = hidden_dim

        # RNN
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=1, batch_first=True, bidirectional=True)

        # Readout layer
        self.fc = nn.Linear(hidden_dim * 2, output_dim)

    def forward(self, x):

        # Initialize hidden state with zeros
        h0 = Variable(torch.zeros(1 * 2, x.size(0), self.hidden_dim))
        c0 = Variable(torch.zeros(1 * 2, x.size(0), self.hidden_dim))

        # One time step
        out, (hn, cn) = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])

        return out


In [43]:
def train_Bi_LSTM(model, train_loader, optimizer, criterion):

    epoch_loss = 0
    epoch_acc = 0

    model.train()

    for batch in train_loader:

        train = Variable(batch[0].view(-1, 512, 768))
        labels = Variable(batch[1])

        optimizer.zero_grad()

        output = model(train)

        loss = criterion(output, labels)

        acc = binary_accuracy(output.argmax(-1), labels)

        loss.backward()

        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss / len(train_loader), epoch_acc / len(train_loader)


In [44]:
model_bi_lstm = Bi_LSTM_Network(EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
print(model_bi_lstm)


Bi_LSTM_Network(
  (lstm): LSTM(768, 20, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=40, out_features=2, bias=True)
)


In [45]:
N_EPOCHS = 5

best_valid_loss = float("inf")

criterion = nn.CrossEntropyLoss(weight=torch.FloatTensor([1 / train_fair, 1 / train_unfair]))

optimizer = optim.Adam(model_bi_lstm.parameters(), lr=1e-4)

for epoch in range(N_EPOCHS):

    start_time = time.time()

    bi_lstm_train_loss, bi_lstm_train_acc = train_Bi_LSTM(model_bi_lstm, train_loader, optimizer, criterion)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print(f"Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s")
    print(
        f"\tTrain Loss: \
    {bi_lstm_train_loss:.3f} | Train Acc: {bi_lstm_train_acc*100:.2f}%"
    )


Epoch: 01 | Epoch Time: 0m 51s
	Train Loss:     0.664 | Train Acc: 76.07%
Epoch: 02 | Epoch Time: 0m 50s
	Train Loss:     0.644 | Train Acc: 73.88%
Epoch: 03 | Epoch Time: 0m 47s
	Train Loss:     0.632 | Train Acc: 73.15%
Epoch: 04 | Epoch Time: 0m 46s
	Train Loss:     0.626 | Train Acc: 73.60%
Epoch: 05 | Epoch Time: 0m 46s
	Train Loss:     0.617 | Train Acc: 73.32%


In [46]:
test_loader_predict = torch.utils.data.DataLoader(test_data, batch_size=1, num_workers=NUM_WORKERS)


def predict(model, dataloader):
    prediction_list = []
    for i, batch in enumerate(dataloader):
        test = Variable(batch.view(-1, 512, 768))
        outputs = model(test)
        _, predicted = torch.max(outputs.data, 1)
        prediction_list.append(predicted.cpu())
    return prediction_list


predictions_bi_lstm = predict(model_bi_lstm, test_loader_predict)

a_tensor = torch.IntTensor(predictions_bi_lstm)
b_tensor = torch.IntTensor(test_targets)

accuracy = Accuracy()
bi_lstm_test_acc = accuracy(a_tensor, b_tensor).item()

f1 = F1Score(num_classes=2, average="macro")  # checked if weighted can be used
bi_lstm_f1_score = f1(a_tensor, b_tensor).item()

precision = Precision(average="macro", num_classes=2)
bi_lstm_precision = precision(a_tensor, b_tensor).item()

recall = Recall(average="macro", num_classes=2)
bi_lstm_recall = recall(a_tensor, b_tensor).item()

print("Accuracy of LSTM :", bi_lstm_test_acc)
print("F1 score of LSTM :", bi_lstm_f1_score)
print("Precision of LSTM :", bi_lstm_precision)
print("Recall of of LSTM :", bi_lstm_recall)


Accuracy of LSTM : 0.759957492351532
F1 score of LSTM : 0.5818121433258057
Precision of LSTM : 0.5761378407478333
Recall of of LSTM : 0.6416870951652527


In [47]:
table = Texttable()
table.set_cols_dtype(["a", "f", "f", "f", "f", "f"])
table.set_precision(5)
table.add_rows(
    [
        ["Model", "Train accuracy", "Test Accuracy", "F1-score", "Precision", "Recall"],
        ["Bi-LSTM", bi_lstm_train_acc, bi_lstm_test_acc, bi_lstm_f1_score, bi_lstm_precision, bi_lstm_recall],
    ]
)
print(table.draw(), "\n")


+---------+----------------+---------------+----------+-----------+---------+
|  Model  | Train accuracy | Test Accuracy | F1-score | Precision | Recall  |
| Bi-LSTM | 0.73320        | 0.75996       | 0.58181  | 0.57614   | 0.64169 |
+---------+----------------+---------------+----------+-----------+---------+ 



In [48]:
# compare all
table = Texttable()
table.set_cols_dtype(["a", "f", "f", "f", "f", "f"])
table.set_precision(5)
table.add_rows(
    [
        ["Model", "Train accuracy", "Test Accuracy", "F1-score", "Precision", "Recall"],
        ["Simple RNN", rnn_train_acc, rnn_test_acc, rnn_f1_score, rnn_precision, rnn_recall],
        ["Gated RNN", gru_train_acc, gru_test_acc, gru_f1_score, gru_precision, gru_recall],
        ["LSTM", lstm_train_acc, lstm_test_acc, lstm_f1_score, lstm_precision, lstm_recall],
        ["Bi-LSTM", bi_lstm_train_acc, bi_lstm_test_acc, bi_lstm_f1_score, bi_lstm_precision, bi_lstm_recall],
    ]
)
print(table.draw(), "\n")


+------------+----------------+---------------+----------+-----------+---------+
|   Model    | Train accuracy | Test Accuracy | F1-score | Precision | Recall  |
| Simple RNN | 0.71450        | 0.74137       | 0.56351  | 0.56340   | 0.62274 |
+------------+----------------+---------------+----------+-----------+---------+
| Gated RNN  | 0.72912        | 0.66118       | 0.53566  | 0.56447   | 0.65436 |
+------------+----------------+---------------+----------+-----------+---------+
| LSTM       | 0.74970        | 0.74562       | 0.56365  | 0.56257   | 0.61873 |
+------------+----------------+---------------+----------+-----------+---------+
| Bi-LSTM    | 0.73320        | 0.75996       | 0.58181  | 0.57614   | 0.64169 |
+------------+----------------+---------------+----------+-----------+---------+ 



<span style="color:darkviolet">
<font size="3">Best Performance : Bi-LSTM</font><br>
</span>
