In [1]:
import pandas as pd
import time, datetime, numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from collections import Counter

from transformers import BertTokenizer, BertForSequenceClassification
from transformers import AdamW, get_linear_schedule_with_warmup
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AdamW, get_linear_schedule_with_warmup

## Mounting Google Drive to Collab

In [2]:
from google.colab import drive
drive.mount('/content/drive')

df = pd.read_csv('/content/drive/MyDrive/[CS4248] Project Folder/data/esnli_train.csv')
val = pd.read_csv('/content/drive/MyDrive/[CS4248] Project Folder/data/esnli_val.csv')
test = pd.read_csv('/content/drive/MyDrive/[CS4248] Project Folder/data/esnli_test.csv')

Mounted at /content/drive


## Utility Functions

In [9]:
def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    elapsed_rounded = int(round((elapsed)))
    return str(datetime.timedelta(seconds=elapsed_rounded))

def select_cols(df, col_list):
    '''
    Select columns from a dataframe
    '''
    return df[col_list]

## Data Pre-processing Utility Functions

In [10]:
def combine_sentences(df, col_list):

    results_df = df.copy()
    results_df['combined_text'] = '[CLS]' + results_df[col_list].astype(str).agg('[SEP]'.join, axis=1)
    return results_df

Train/Test Input Data Handling

In [None]:
target_cols = ['Sentence1', 'Sentence2', 'Explanation_1', 'gold_label'] # Premise, Hypothesis, Explanation

df = select_cols(df, target_cols)
val = select_cols(val, target_cols)
test_df = select_cols(test, target_cols)

In [12]:
df = combine_sentences(df, target_cols[:-1])
val = combine_sentences(val, target_cols[:-1])
test_df = combine_sentences(test_df, target_cols[:-1])

lables = {
    'entailment': 0,
    'neutral': 1,
    'contradiction': 2
}

df['labels'] = df['gold_label'].map(lables)
val['labels'] = val['gold_label'].map(lables)
test_df['labels'] = test_df['gold_label'].map(lables)

In [16]:
X_train = df['combined_text']
y_train = df['labels']

X_test = test_df['combined_text']
y_test = test_df['labels']

X_val = val['combined_text']
y_val = val['labels']

((259999,), (259999,))

In [19]:
class NliDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# tokenize train/validation data
train_encodings = tokenizer(X_train.tolist(), truncation=True, padding=True)
val_encodings = tokenizer(X_val.tolist(), truncation=True, padding=True)

# tokenize test data
test_encodings = tokenizer(X_test.tolist(), truncation=True, padding=True)

In [21]:
# creating Dataset objects
train_dataset = NliDataset(train_encodings, y_train.tolist())
val_dataset = NliDataset(val_encodings, y_val.tolist())
test_dataset = NliDataset(test_encodings, y_test.tolist())

# Create DataLoader instances
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
validation_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [22]:
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels = 3,
    output_attentions = False,
    output_hidden_states = False,
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [23]:
if torch.cuda.is_available():
    print("CUDA is available. Using GPU:", torch.cuda.get_device_name(0))
else:
    print("CUDA is not available. Using CPU.")

print("Current device model is on:", model.device)

CUDA is available. Using GPU: Tesla V100-SXM2-16GB
Current device model is on: cuda:0


In [24]:
# # uncomment when loading saved models

# model_type = 'premise_hypothesis_explanation'
# model_save_path = f"/content/drive/MyDrive/[CS4248] Project Folder/models/{model_type}.pth"
# optimizer_save_path = f"/content/drive/MyDrive/[CS4248] Project Folder/optimizer/{model_type}.pth"

# model.load_state_dict(torch.load(model_save_path))
# # optimizer.load_state_dict(torch.load(optimizer_save_path))

In [25]:
optimizer = AdamW(model.parameters(),
                  lr = 5e-5,
                  eps = 1e-8
                 )

epochs = 2
total_steps = len(train_loader) * epochs

scheduler = get_linear_schedule_with_warmup(optimizer,
                                            num_warmup_steps = 0,
                                            num_training_steps = total_steps)



In [None]:
# Begin Training Loop
loss_values = []

for epoch_i in range(0, epochs):
    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    # time taken for each epoch
    t0 = time.time()

    total_loss = 0

    model.train()

    for step, batch in enumerate(train_loader):
        if step % 40 == 0 and not step == 0:
            elapsed = format_time(time.time() - t0)
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_loader), elapsed))

        # Unpack this training batch from our dataloader.
        #
        # As we unpack the batch, we'll also copy each tensor to the GPU using the
        # `to` method.
        #
        # `batch` contains three pytorch tensors:
        #   [0]: input ids
        #   [1]: attention masks
        #   [2]: labels
        b_input_ids = batch['input_ids'].to(device)
        b_input_mask = batch['attention_mask'].to(device)
        b_labels = batch['labels'].to(device)

        # clear previously calculated gradient before backward pass
        model.zero_grad()

        # forward pass
        outputs = model(b_input_ids,
                        token_type_ids=None,
                        attention_mask=b_input_mask,
                        labels=b_labels)

        loss = outputs.loss

        # Accumulate the training loss over all of the batches so that we can
        # calculate the average loss at the end. `loss` is a Tensor containing a
        # single value; the `.item()` function just returns the Python value
        # from the tensor.
        total_loss += loss.item()

        # backwar pass
        loss.backward()

        # Clip the norm of the gradients to 1.0, helps prevents "exploding gradient"
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        # Update parameters and take a step
        optimizer.step()

        # Update the learning rate.
        scheduler.step()

    avg_train_loss = total_loss / len(train_loader)
    loss_values.append(avg_train_loss)

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epoch took: {:}".format(format_time(time.time() - t0)))

print("")
print("Training complete!")

In [27]:
model.eval()

predictions, true_labels, attention_maps, tokens_list = [], [], [], []

# load test cases into GPU in batches
for batch in test_loader:
    batch = {k: v.to(device) for k, v in batch.items()}

    with torch.no_grad():
        outputs = model(**batch)

    """
    Code to store attention maps and tokens for visualization and analysis
    """
    # if len(attention_maps) < 100:
    #   attention = outputs.attentions
    #   tokens = [tokenizer.convert_ids_to_tokens(t) for t in batch['input_ids']]

    #   for seq_index in range(batch['input_ids'].shape[0]):
    #       attention_per_sequence = tuple(layer[seq_index].detach().cpu().unsqueeze(0) for layer in attention)
    #       tokens_per_sequence = tokens[seq_index]

    #       attention_maps.append(attention_per_sequence)
    #       tokens_list.append(tokens_per_sequence)

    logits = outputs.logits.detach().cpu().numpy()
    label_ids = batch['labels'].to('cpu').numpy()

    predictions.append(logits)
    true_labels.append(label_ids)

predictions = np.argmax(np.concatenate(predictions, axis=0), axis=1)
true_labels = np.concatenate(true_labels, axis=0)

accuracy = accuracy_score(true_labels, predictions)
print("Accuracy:", accuracy)

precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='weighted')
print(f"Precision: {precision}\nRecall: {recall}\nF1 Score: {f1}")

Accuracy: 0.9756718241042345
Precision: 0.9756683326001164
Recall: 0.9756718241042345
F1 Score: 0.9756598469461969


In [None]:
model_type = f'premise_explanation_hypothesis_new'
model_save_path = f"/content/drive/MyDrive/[CS4248] Project Folder/models/{model_type}.pth"
optimizer_save_path = f"/content/drive/MyDrive/[CS4248] Project Folder/optimizer/{model_type}.pth"

# Save the model, optimizer and encodings state
torch.save(model.state_dict(), model_save_path)
torch.save(optimizer.state_dict(), optimizer_save_path)