<a href="https://colab.research.google.com/github/adisav17/Deep-Semantic-Role-Labeling-with-Auxilary-tasks/blob/main/test_bert_base_pred_ind.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m60.6 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m91.6 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.14.1 tokenizers-0.13.3 transformers-4.28.1


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
from sklearn.metrics import precision_recall_curve, f1_score, accuracy_score
import numpy as np
import math
import random
import itertools
import pandas as pd


In [None]:
class SRLDataset(Dataset):
    def __init__(self, sentences, predicates, labels, tokenizer, max_length):
        self.sentences = sentences
        self.predicates = predicates
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, index):
        sentence = self.sentences[index]
        predicate = self.predicates[index]
        label = self.labels[index]

        # Tokenize sentence without special tokens to handle alignment
        tokenized_sentence = self.tokenizer.tokenize(sentence)

        # Initialize a list of labels with -100 (ignored by loss function) and the same length as the tokenized sentence
        aligned_labels = [-100] * len(tokenized_sentence)

        # Iterate through the original sentence words, labels, and their indices
        words = sentence.split()
        for word, lbl, idx in zip(words, label, range(len(words))):
            # Tokenize the current word
            subwords = self.tokenizer.tokenize(word)

            # Assign the label to the first subword of the current word
            subword_idx = tokenized_sentence.index(subwords[0], idx)
            aligned_labels[subword_idx] = lbl

        # Tokenize sentence and add [CLS] and [SEP] tokens
        tokenized_sentence = self.tokenizer.encode(sentence, add_special_tokens=True, max_length=self.max_length, padding='max_length', truncation=True)
        input_ids = torch.tensor(tokenized_sentence, dtype=torch.long)

        # Add [CLS] and [SEP] tokens to the aligned_labels and pad or truncate to match max_length
        aligned_labels = [-100] + aligned_labels[:self.max_length - 2] + [-100]
        aligned_labels = aligned_labels + [-100] * (self.max_length - len(aligned_labels))

        # Convert the aligned_labels list to a torch tensor
        aligned_labels = torch.tensor(aligned_labels, dtype=torch.long)

        # Find index of predicate in tokenized sentence
        predicate_idx = tokenized_sentence.index(self.tokenizer.encode(predicate)[1])

        return input_ids, predicate_idx, aligned_labels

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [None]:
def convert_file(file_path):
    with open(file_path) as f:
        lines = f.readlines()

    lines = [line.split() for line in lines]
    sentences = []
    tags = []
    predicates = []
    sentence = []
    tag = []
    curr_pred = None
    exists_pred_in_sent = False
    exists_arg_in_sent = False

    for line in lines:
        if len(line) != 0:
            sentence.append(line[0])

            if len(line) >= 6:
                if line[5] == "PRED":
                    curr_pred = line[0]
                    exists_pred_in_sent = True

                if line[5] == "ARG1":
                    exists_arg_in_sent = True
                    tag.append(1)
                else:
                    tag.append(0)
            else:
                tag.append(0)
        else:
            if exists_arg_in_sent and exists_pred_in_sent:
                sentences.append(" ".join(sentence))
                tags.append(tag)
                predicates.append(curr_pred)

                exists_pred_in_sent = False
                exists_arg_in_sent = False

            sentence = []
            tag = []
            curr_pred = None

    if len(sentence) > 0 and exists_arg_in_sent and exists_pred_in_sent:
        sentences.append(" ".join(sentence))
        tags.append(tag)
        predicates.append(curr_pred)

    return sentences, tags, predicates


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/My Drive/nlp_srl

/content/drive/My Drive/nlp_srl


In [None]:
sentences_1, labels_1, predicates_1 = convert_file('partitive_group_nombank.clean.train')

In [None]:
sentences_2, labels_2, predicates_2 = convert_file('partitive_group_nombank.clean.test')

In [None]:
sentences_3, labels_3, predicates_3 = convert_file('partitive_group_nombank.clean.dev')

In [None]:
print(len(sentences_1))
print(len(labels_1))
print(len(predicates_1))



9584
9584
9584


In [None]:
print(len(sentences_2))
print(len(labels_2))
print(len(predicates_2))


584
584
584


In [None]:
print(len(sentences_3))
print(len(labels_3))
print(len(predicates_3))

353
353
353


In [None]:
sentences_3[0]

'The consensus view expects a 0.4 % increase in the September CPI after a flat reading in August .'

In [None]:
labels_3[0]

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]

In [None]:
predicates_1[0:5]

['%', '%', '%', '%', '%']

In [None]:
sentences_all = sentences_1.copy()
labels_all = labels_1.copy()
predicates_all = predicates_1.copy()

sentences_all.extend(sentences_2)
labels_all.extend(labels_2)
predicates_all.extend(predicates_2)

sentences_all.extend(sentences_3)
labels_all.extend(labels_3)
predicates_all.extend(predicates_3)

In [None]:
print(len(sentences_all))
print(len(labels_all))
print(len(predicates_all))

10521
10521
10521


In [None]:


# Combine sentences, labels, and predicates into a list of tuples
combined_data = list(zip(sentences_all, labels_all, predicates_all))

# Shuffle the combined data using a random seed for reproducibility
random_seed = 42
random.seed(random_seed)
random.shuffle(combined_data)

# Split the shuffled data into training and validation sets
split_ratio = 0.8  
split_index = int(len(combined_data) * split_ratio)

train_data = combined_data[:split_index]
val_data = combined_data[split_index:]

# Separate sentences, labels, and predicates for the train and validation sets
train_sentences, train_labels, train_predicates = zip(*train_data)
val_sentences, val_labels, val_predicates = zip(*val_data)


In [None]:
print(len(train_sentences))
print(len(train_labels))
print(len(train_predicates))

8416
8416
8416


In [None]:
print(len(val_sentences))
print(len(val_labels))
print(len(val_predicates))

2105
2105
2105


In [None]:
X = []
count = 0
max_length = 0
for i in range(len(train_sentences)):

  if(len(train_labels[i])>max_length):
    max_length = len(train_labels[i])

  count+=1
  X.append(len(train_sentences[i].split()) == len(train_labels[i]))  

In [None]:
print(max_length)
print(sum(X))
print(count)

108
8416
8416


In [None]:
import torch
import torch.nn as nn
from transformers import BertModel

class SRLModel(nn.Module):
    def __init__(self, bert_model, lstm_hidden_size, dropout_rate):
        super(SRLModel, self).__init__()
        self.bert =bert_model
        self.lstm = nn.LSTM(input_size=self.bert.config.hidden_size,
                            hidden_size=lstm_hidden_size,
                            num_layers=1,
                            batch_first=True,
                            bidirectional=True)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(in_features=lstm_hidden_size * 2, out_features=1)

    def forward(self, input_ids, predicate_idx, labels=None):
        bert_output = self.bert(input_ids=input_ids)
        sequence_output = bert_output.last_hidden_state

        lstm_output, _ = self.lstm(sequence_output)
        lstm_output = self.dropout(lstm_output)
        logits = self.fc(lstm_output)

        if labels is not None:
            labels_mask = (labels != -100)
            labels = labels[labels_mask]
            logits = logits[labels_mask]

        return logits, labels


In [None]:
import torch
import torch.nn as nn
from transformers import BertModel

class SRLindModel(nn.Module):
    def __init__(self, bert_model, lstm_hidden_size, dropout_rate, predicate_emb_dim):
        super(SRLindModel, self).__init__()
        self.bert = bert_model

        self.predicate_embedding = nn.Embedding(2, predicate_emb_dim)

        self.downstream = nn.Sequential(
            nn.LSTM(input_size=self.bert.config.hidden_size + predicate_emb_dim,
                    hidden_size=lstm_hidden_size,
                    num_layers=1,
                    batch_first=True,
                    bidirectional=True),
            nn.Dropout(dropout_rate),
            nn.Linear(in_features=lstm_hidden_size * 2, out_features=1)
        )

    def forward(self, input_ids, predicate_idx, labels=None, attention_mask=None):
        bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        sequence_output = bert_output.last_hidden_state

        # Create predicate indicator embedding
        predicate_indicator = torch.zeros_like(input_ids)
        predicate_indicator.scatter_(1, predicate_idx.unsqueeze(1), 1)
        predicate_emb = self.predicate_embedding(predicate_indicator)
        
        # Concatenate predicate indicator embeddings with the sequence output
        sequence_output = torch.cat((sequence_output, predicate_emb), dim=-1)

        downstream_output, _ = self.downstream[0](sequence_output)
        downstream_output = self.downstream[1](downstream_output)
        logits = self.downstream[2](downstream_output)

        if labels is not None:
            labels_mask = (labels != -100)
            labels = labels[labels_mask]
            logits = logits[labels_mask]

        return logits, labels


In [None]:
def validate(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    all_labels = []
    all_logits = []

    with torch.no_grad():
        for batch in dataloader:
            input_ids, predicate_idx, padded_labels = batch
            input_ids, predicate_idx, padded_labels = input_ids.to(device), predicate_idx.to(device), padded_labels.to(device)

            

            logits, labels = model(input_ids, predicate_idx, padded_labels)

            loss = criterion(logits, labels.float().unsqueeze(1))
            total_loss += loss.item()

            mask = labels.ne(-100)
            valid_labels = labels[mask].cpu().numpy()
            valid_logits = logits[mask].cpu().numpy().squeeze()

            all_labels.extend(valid_labels)
            all_logits.extend(valid_logits)

    # Calculate metrics
    #print(all_labels)
    #print(all_logits)
    average_loss = total_loss / len(dataloader)
    all_labels = np.array(all_labels)
    all_logits = np.array(all_logits)
    
    all_probs = 1 / (1 + np.exp(-all_logits))

    precision, recall, thresholds = precision_recall_curve(all_labels, all_probs)

    f_scores = np.where((precision + recall) != 0.0, (2 *precision * recall)/ (precision + recall + 1e-10), 0)
  
    best_threshold = thresholds[np.argmax(f_scores)]

    # Calculate accuracy and F-score using the best threshold
    preds = (all_probs > best_threshold).astype(int)
    accuracy = accuracy_score(all_labels, preds)
    best_f_score = f1_score(all_labels, preds)

    return average_loss, accuracy, best_f_score, best_threshold


In [None]:
def validate_on_train(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    all_labels = []
    all_logits = []

    with torch.no_grad():
        for batch in dataloader:
            input_ids, predicate_idx, padded_labels = batch
            input_ids, predicate_idx, padded_labels = input_ids.to(device), predicate_idx.to(device), padded_labels.to(device)

            logits, labels = model(input_ids, predicate_idx, padded_labels)

            loss = criterion(logits, labels.float().unsqueeze(1))
            total_loss += loss.item()

            mask = labels.ne(-100)
            valid_labels = labels[mask].cpu().numpy()
            valid_logits = logits[mask].cpu().numpy().squeeze()

            all_labels.extend(valid_labels)
            all_logits.extend(valid_logits)

    average_loss = total_loss / len(dataloader)
    all_labels = np.array(all_labels)
    all_logits = np.array(all_logits)

    all_probs = 1 / (1 + np.exp(-all_logits))

    precision, recall, thresholds = precision_recall_curve(all_labels, all_probs)

    f_scores = np.where((precision + recall) != 0.0, (2 * precision * recall) / (precision + recall + 1e-10), 0)

    best_threshold = thresholds[np.argmax(f_scores)]

    preds = (all_probs > best_threshold).astype(int)
    accuracy = accuracy_score(all_labels, preds)
    best_f_score = f1_score(all_labels, preds)

    return average_loss, accuracy, best_f_score, best_threshold


In [None]:
def train_model_old(model, train_dataset, train_dataloader, val_dataloader, criterion, optimizer, num_epochs, clip_grad_value = 1, weighting_method='none', custom_value= 20):

    if weighting_method != 'none':
        negative_count = sum([1 for label_seq in train_dataset.labels for label in label_seq if label == 0])
        positive_count = sum([1 for label_seq in train_dataset.labels for label in label_seq if label == 1])

        if weighting_method == 'direct':
            pos_weight = torch.tensor([negative_count / positive_count], device=device)
        elif weighting_method == 'log':
            pos_weight = torch.tensor([np.log(negative_count / positive_count)], device=device)
        elif weighting_method == 'custom':
            pos_weight = torch.tensor([custom_value], device=device)
        else:
            raise ValueError("Invalid weighting_method value. It must be 'none', 'direct', 'log', or 'custom'.")
    else:
        pos_weight = torch.tensor(1.0, device=device)

    train_accuracies = []
    val_accuracies = []
    train_f_scores = []
    val_f_scores = []
    avg_train_loss_per_epoch = []
    avg_val_loss_per_epoch = []

    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        total_train_loss = 0
        num_train_batches = 0
        for i, batch in enumerate(train_dataloader):
            model.train()
            input_ids, predicate_idx, padded_labels = batch
            input_ids, predicate_idx, padded_labels = input_ids.to(device), predicate_idx.to(device), padded_labels.to(device)

            logits, labels = model(input_ids, predicate_idx, padded_labels)

            criterion.pos_weight = pos_weight

            loss = criterion(logits, labels.float().unsqueeze(1))
            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad_value)

            optimizer.step()
            optimizer.zero_grad()

            total_train_loss += loss.item()
            num_train_batches += 1

            #if i % 3 == 0:
            #    print(f"Batch {i}, Loss: {loss.item()}")

        avg_train_loss_per_epoch.append(total_train_loss / num_train_batches)

        val_loss, val_accuracy, val_f_score, val_threshold = validate(model, val_dataloader, criterion)
        avg_val_loss_per_epoch.append(val_loss)
        print(f"Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}, Best F-score: {val_f_score}, Best Threshold: {val_threshold}")

        if (epoch + 1) % 10 == 0:
            train_loss, train_accuracy, train_f_score, train_threshold = validate_on_train(model, train_dataloader, criterion)
            train_accuracies.append(train_accuracy)
            val_accuracies.append(val_accuracy)
            train_f_scores.append(train_f_score)
            val_f_scores.append(val_f_score)
            print(f"Train Loss: {train_loss}, Train Accuracy: {train_accuracy}, Best F-score: {train_f_score}, Best Threshold: {train_threshold}")

    return avg_train_loss_per_epoch, avg_val_loss_per_epoch, train_accuracies, val_accuracies, train_f_scores, val_f_scores


In [None]:
def train_model(model, train_dataset, train_dataloader, val_dataloader, criterion, optimizer, num_epochs, clip_grad_value=1, weighting_method='none', custom_value=20, patience=15):

  if weighting_method != 'none':
      negative_count = sum([1 for label_seq in train_dataset.labels for label in label_seq if label == 0])
      positive_count = sum([1 for label_seq in train_dataset.labels for label in label_seq if label == 1])

      if weighting_method == 'direct':
          pos_weight = torch.tensor([negative_count / positive_count], device=device)
      elif weighting_method == 'log':
          pos_weight = torch.tensor([np.log(negative_count / positive_count)], device=device)
      elif weighting_method == 'custom':
          pos_weight = torch.tensor([custom_value], device=device)
      else:
          raise ValueError("Invalid weighting_method value. It must be 'none', 'direct', 'log', or 'custom'.")
  else:
      pos_weight = torch.tensor(1.0, device=device)



  train_accuracies = []
  val_accuracies = []
  train_f_scores = []
  val_f_scores = []
  avg_train_loss_per_epoch = []
  avg_val_loss_per_epoch = []

  # Early stopping initialization
  best_val_accuracy = float('-inf')
  patience_counter = 0

  for epoch in range(num_epochs):
      print(f"Epoch {epoch+1}/{num_epochs}")
      total_train_loss = 0
      num_train_batches = 0

      for i, batch in enumerate(train_dataloader):
          model.train()
          input_ids, predicate_idx, padded_labels = batch
          input_ids, predicate_idx, padded_labels = input_ids.to(device), predicate_idx.to(device), padded_labels.to(device)

          logits, labels = model(input_ids, predicate_idx, padded_labels)

          criterion.pos_weight = pos_weight

          loss = criterion(logits, labels.float().unsqueeze(1))
          loss.backward()

          torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad_value)

          optimizer.step()
          optimizer.zero_grad()

          total_train_loss += loss.item()
          num_train_batches += 1


      avg_train_loss_per_epoch.append(total_train_loss / num_train_batches)

      val_loss, val_accuracy, val_f_score, val_threshold = validate(model, val_dataloader, criterion)
      avg_val_loss_per_epoch.append(val_loss)
      print(f"Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}, Best F-score: {val_f_score}, Best Threshold: {val_threshold}")

      if (epoch + 1) % 10 == 0:
          train_loss, train_accuracy, train_f_score, train_threshold = validate_on_train(model, train_dataloader, criterion)
          train_accuracies.append(train_accuracy)
          val_accuracies.append(val_accuracy)
          train_f_scores.append(train_f_score)
          val_f_scores.append(val_f_score)
          print(f"Train Loss: {train_loss}, Train Accuracy: {train_accuracy}, Best F-score: {train_f_score}, Best Threshold: {train_threshold}")

      # Early stopping
      if val_accuracy > best_val_accuracy:
          best_val_accuracy = val_accuracy
          patience_counter = 0
      else:
          patience_counter += 1

      if patience_counter >= patience:
          print(f"Early stopping triggered after {epoch + 1} epochs due to no improvement in validation accuracy")
          num_missing_values = num_epochs - epoch - 1
          train_accuracies.extend([None] * num_missing_values)
          val_accuracies.extend([None] * num_missing_values)
          train_f_scores.extend([None] * num_missing_values)
          val_f_scores.extend([None] * num_missing_values)

          #return avg_train_loss_per_epoch, avg_val_loss_per_epoch, train_accuracies, val_accuracies, train_f_scores, val_f_scores
          return avg_train_loss_per_epoch, avg_val_loss_per_epoch, train_accuracies, val_accuracies, train_f_scores, val_f_scores
          

  return avg_train_loss_per_epoch, avg_val_loss_per_epoch, train_accuracies, val_accuracies, train_f_scores, val_f_scores


In [None]:
hyper_parameter_dict = {'learning_rate': [1e-4,1e-5,2e-4,2e-5], 'clip_grad_value':[0.8,1.0,1.5,2.0], 'lstm_hidden_size':[50,70,80,96,128], 'dropout_rate':[0.1,0.2,0.3], 
                        'custom_weight_value': [10, 15, 20 ,27, 40] }

In [None]:
bert_model = BertModel.from_pretrained("bert-base-uncased")

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
num_labels = 1
hidden_size = 768

In [None]:
max_length = 128
train_dataset = SRLDataset(train_sentences, train_predicates, train_labels, tokenizer, max_length)
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)


In [None]:
max_length = 128
val_dataset = SRLDataset(val_sentences, val_predicates, val_labels, tokenizer, max_length)
val_dataloader = DataLoader(val_dataset, batch_size=8, shuffle=True)

In [None]:
negative_count = sum([1 for label_seq in train_dataset.labels for label in label_seq if label == 0])
print(negative_count)
positive_count = sum([1 for label_seq in train_dataset.labels for label in label_seq if label == 1])
print(positive_count)
pos_weight = torch.tensor([negative_count / positive_count ], device=device)
print(pos_weight)

241285
8754
tensor([27.5628], device='cuda:0')


In [None]:
import csv
import ast


In [None]:
num_epochs = 100

criterion = nn.BCEWithLogitsLoss()

In [None]:

def grid_search(hyper_parameter_dict, results_csv_path):
    # Create the CSV file and write the header
    with open(results_csv_path, 'w', newline='') as csvfile:
        fieldnames = ['learning_rate', 'clip_grad_value', 'lstm_hidden_size', 'dropout_rate', 'custom_weight_value', 'train_accuracies', 'train_f_scores', 'val_accuracies', 'val_f_scores']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

    for config in itertools.product(*hyper_parameter_dict.values()):
        print(f"Training with hyperparameter configuration: {config}")
        learning_rate, clip_grad_value, lstm_hidden_size, dropout_rate, custom_weight_value = config

        # Train the model with the current configuration of hyperparameters
        model = SRLModel(bert_model, hidden_size, num_labels, lstm_hidden_size=lstm_hidden_size, dropout_rate=dropout_rate).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        criterion = nn.BCEWithLogitsLoss()

        avg_train_loss_per_epoch, avg_val_loss_per_epoch, train_accuracies, val_accuracies, train_f_scores, val_f_scores = train_model(model, train_dataset, train_dataloader, val_dataloader, criterion, optimizer, num_epochs, clip_grad_value, weighting_method='custom', custom_value=custom_weight_value, patience = 15)

        # Save the current configuration and its results to the CSV file
        results_dict = {'learning_rate': learning_rate, 'clip_grad_value': clip_grad_value, 'lstm_hidden_size': lstm_hidden_size, 'dropout_rate': dropout_rate, 'custom_weight_value': custom_weight_value, 'train_accuracies': str(train_accuracies), 'train_f_scores': str(train_f_scores), 'val_accuracies': str(val_accuracies), 'val_f_scores': str(val_f_scores)}
        
        with open(results_csv_path, 'a', newline='') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writerow(results_dict)



In [None]:

results_csv_path = 'grid_search_results_sequential_new.csv'
#grid_search(hyper_parameter_dict, results_csv_path)

In [None]:
# train single 
# bs 8
lstm_hidden_size= 35
dropout_rate = 0.2
learning_rate = 3e-5
clip_grad_value = 1.0
custom_weight_value = 27.0

In [None]:
model = SRLindModel(bert_model = bert_model, lstm_hidden_size =lstm_hidden_size, dropout_rate = dropout_rate, predicate_emb_dim = 50).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.BCEWithLogitsLoss()

In [None]:

avg_train_loss_per_epoch, avg_val_loss_per_epoch, train_accuracies, val_accuracies, train_f_scores, val_f_scores = train_model(model, train_dataset, train_dataloader, val_dataloader, criterion, optimizer, num_epochs, clip_grad_value, weighting_method='custom', custom_value=custom_weight_value, patience = 30)

Epoch 1/100
Validation Loss: 0.31767563524684217, Validation Accuracy: 0.9788396127681842, Best F-score: 0.7437623762376239, Best Threshold: 0.9596333503723145
Epoch 2/100
Validation Loss: 0.2919999873830062, Validation Accuracy: 0.9811616954474097, Best F-score: 0.7578814627994955, Best Threshold: 0.9784857034683228
Epoch 3/100
Validation Loss: 0.4707587674669357, Validation Accuracy: 0.9810799319727891, Best F-score: 0.7573914866848395, Best Threshold: 0.9603368639945984
Epoch 4/100
Validation Loss: 0.497175514302922, Validation Accuracy: 0.9803440607012036, Best F-score: 0.755193482688391, Best Threshold: 0.9398666620254517
Epoch 5/100
Validation Loss: 0.444677281786095, Validation Accuracy: 0.9815214547357405, Best F-score: 0.758443779392903, Best Threshold: 0.9603691101074219
Epoch 6/100
Validation Loss: 0.7665252569625436, Validation Accuracy: 0.9795100732600732, Best F-score: 0.7414895811842377, Best Threshold: 0.6563801765441895
Epoch 7/100
Validation Loss: 0.8784289515937086, 

In [None]:
#model = SRLModel(bert_model,  lstm_hidden_size=lstm_hidden_size, dropout_rate=dropout_rate).to(device)
#optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
#criterion = nn.BCEWithLogitsLoss()

In [None]:
#bert_model, lstm_hidden_size, dropout_rate, predicate_emb_dim
bs = 8
lstm_hidden_size= 40
dropout_rate = 0.2
learning_rate = 3e-5
clip_grad_value = 1.0
custom_weight_value = 27.0
model = SRLindModel(bert_model = bert_model, lstm_hidden_size =lstm_hidden_size, dropout_rate = dropout_rate, predicate_emb_dim = 50).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.BCEWithLogitsLoss()

In [None]:
# train single 
# bs 4
#lstm_hidden_size= 30
#dropout_rate = 0.2
#learning_rate = 3e-5
#clip_grad_value = 1.0
#custom_weight_value = 27.0

In [None]:
# train single
#bs = 8
#lstm_hidden_size= 40
#dropout_rate = 0.2
#learning_rate = 3e-5
#clip_grad_value = 1.0
#custom_weight_value = 27.0
# pred emb 50

avg_train_loss_per_epoch, avg_val_loss_per_epoch, train_accuracies, val_accuracies, train_f_scores, val_f_scores = train_model(model, train_dataset, train_dataloader, val_dataloader, criterion, optimizer, num_epochs, clip_grad_value, weighting_method='custom', custom_value=custom_weight_value, patience = 30)

Epoch 1/100
Validation Loss: 0.34619384651269874, Validation Accuracy: 0.9805239403453689, Best F-score: 0.7517198248905567, Best Threshold: 0.9571730494499207
Epoch 2/100
Validation Loss: 0.3621021569988718, Validation Accuracy: 0.9803440607012036, Best F-score: 0.7518579686209743, Best Threshold: 0.9061628580093384
Epoch 3/100
Validation Loss: 0.3846372076959321, Validation Accuracy: 0.9803113553113553, Best F-score: 0.7494798168955473, Best Threshold: 0.9465187788009644
Epoch 4/100
Validation Loss: 0.5859546134685816, Validation Accuracy: 0.9799352433281004, Best F-score: 0.7461204220980757, Best Threshold: 0.929017186164856
Epoch 5/100
Validation Loss: 0.6430579537638661, Validation Accuracy: 0.9812107535321821, Best F-score: 0.746748953052678, Best Threshold: 0.9520973563194275
Epoch 6/100
Validation Loss: 0.6677588578666362, Validation Accuracy: 0.9793956043956044, Best F-score: 0.7430668841761826, Best Threshold: 0.863406777381897
Epoch 7/100
Validation Loss: 0.6240513719314935,

In [None]:
# train 
#bs = 32
#lstm_hidden_size= 35
#dropout_rate = 0.2
#learning_rate = 3e-5
#clip_grad_value = 1.0
#custom_weight_value = 27.0
# pred emb 30

avg_train_loss_per_epoch, avg_val_loss_per_epoch, train_accuracies, val_accuracies, train_f_scores, val_f_scores = train_model(model, train_dataset, train_dataloader, val_dataloader, criterion, optimizer, num_epochs, clip_grad_value, weighting_method='custom', custom_value=custom_weight_value, patience = 30)

Epoch 1/100
Validation Loss: 0.3949638379342628, Validation Accuracy: 0.9721840659340659, Best F-score: 0.6670581327069877, Best Threshold: 0.8785045742988586
Epoch 2/100
Validation Loss: 0.3427461087703705, Validation Accuracy: 0.9766647043432758, Best F-score: 0.7246768280918388, Best Threshold: 0.9359109401702881
Epoch 3/100
Validation Loss: 0.2969254645885843, Validation Accuracy: 0.9795264259549974, Best F-score: 0.735306553911205, Best Threshold: 0.9526841640472412
Epoch 4/100
Validation Loss: 0.3789452574018276, Validation Accuracy: 0.979869832548404, Best F-score: 0.7477975824626101, Best Threshold: 0.9660338163375854
Epoch 5/100
Validation Loss: 0.39225654958775547, Validation Accuracy: 0.9787087912087912, Best F-score: 0.7389735364875701, Best Threshold: 0.9653881192207336
Epoch 6/100
Validation Loss: 0.5129172084006396, Validation Accuracy: 0.9794937205651492, Best F-score: 0.7478890229191796, Best Threshold: 0.882457435131073
Epoch 7/100
Validation Loss: 0.5813846465087298,

In [None]:
# train single
#bs = 8
#lstm_hidden_size= 35
#dropout_rate = 0.2
#learning_rate = 3e-5
#clip_grad_value = 1.0
#custom_weight_value = 27.0
# likely ind model 

avg_train_loss_per_epoch, avg_val_loss_per_epoch, train_accuracies, val_accuracies, train_f_scores, val_f_scores = train_model(model, train_dataset, train_dataloader, val_dataloader, criterion, optimizer, num_epochs, clip_grad_value, weighting_method='custom', custom_value=custom_weight_value, patience = 30)

Epoch 1/100
Validation Loss: 0.2734547866169702, Validation Accuracy: 0.9794119570905285, Best F-score: 0.7394992758121249, Best Threshold: 0.964491069316864
Epoch 2/100
Validation Loss: 0.3396359114611352, Validation Accuracy: 0.9804421768707483, Best F-score: 0.7615629984051037, Best Threshold: 0.8966791033744812
Epoch 3/100
Validation Loss: 0.35004572567501757, Validation Accuracy: 0.982780612244898, Best F-score: 0.7710371819960862, Best Threshold: 0.959236741065979
Epoch 4/100
Validation Loss: 0.3650156750315518, Validation Accuracy: 0.9838108320251178, Best F-score: 0.78515625, Best Threshold: 0.9495284557342529
Epoch 5/100
Validation Loss: 0.8123077294351815, Validation Accuracy: 0.9831240188383046, Best F-score: 0.7751633986928105, Best Threshold: 0.9295356273651123
Epoch 6/100
Validation Loss: 0.7147768876501218, Validation Accuracy: 0.9830749607535322, Best F-score: 0.7740667976424361, Best Threshold: 0.9154190421104431
Epoch 7/100
Validation Loss: 0.8131714978001334, Validat

In [None]:
# train single 
bs = 32
lstm_hidden_size= 35
dropout_rate = 0.3
learning_rate = 3e-5
clip_grad_value = 1.2
custom_weight_value = 27.0


In [None]:
model = SRLindModel(bert_model = bert_model, lstm_hidden_size =lstm_hidden_size, dropout_rate = dropout_rate, predicate_emb_dim = 50).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.BCEWithLogitsLoss()

In [None]:
# train single
#bs = 8
#lstm_hidden_size= 75
#dropout_rate = 0.3
#learning_rate = 3e-5
#clip_grad_value = 1.0
#custom_weight_value = 27.0

avg_train_loss_per_epoch, avg_val_loss_per_epoch, train_accuracies, val_accuracies, train_f_scores, val_f_scores = train_model(model, train_dataset, train_dataloader, val_dataloader, criterion, optimizer, num_epochs, clip_grad_value, weighting_method='custom', custom_value=custom_weight_value, patience = 30)

Epoch 1/100
Validation Loss: 0.33588128115208643, Validation Accuracy: 0.9764357666143381, Best F-score: 0.706337884654575, Best Threshold: 0.9339672327041626
Epoch 2/100
Validation Loss: 0.26941927086632206, Validation Accuracy: 0.9797717163788592, Best F-score: 0.744051313883716, Best Threshold: 0.9527961611747742
Epoch 3/100
Validation Loss: 0.32634172957601415, Validation Accuracy: 0.9786760858189429, Best F-score: 0.7434081070444707, Best Threshold: 0.9194042682647705
Epoch 4/100
Validation Loss: 0.4344470635548515, Validation Accuracy: 0.9796899529042387, Best F-score: 0.7439175257731959, Best Threshold: 0.93880295753479
Epoch 5/100
Validation Loss: 0.48700758111926745, Validation Accuracy: 0.9807201726844584, Best F-score: 0.7454113582379616, Best Threshold: 0.9489056468009949
Epoch 6/100
Validation Loss: 0.684730335824051, Validation Accuracy: 0.9801314756671899, Best F-score: 0.7375243033052497, Best Threshold: 0.9529955387115479
Epoch 7/100
Validation Loss: 0.6116190466608079

In [None]:
# train single
#bs = 64
#lstm_hidden_size= 80
#dropout_rate = 0.2
#learning_rate = 3e-5
#clip_grad_value = 1.5
#custom_weight_value = 27.0

avg_train_loss_per_epoch, avg_val_loss_per_epoch, train_accuracies, val_accuracies, train_f_scores, val_f_scores = train_model(model, train_dataset, train_dataloader, val_dataloader, criterion, optimizer, num_epochs, clip_grad_value, weighting_method='custom', custom_value=custom_weight_value, patience = 30)

Epoch 1/100
Validation Loss: 0.3050805778717095, Validation Accuracy: 0.978414442700157, Best F-score: 0.7261410788381744, Best Threshold: 0.9184591770172119
Epoch 2/100
Validation Loss: 0.29212658353290466, Validation Accuracy: 0.9801151229722659, Best F-score: 0.7469829379941739, Best Threshold: 0.9719102382659912
Epoch 3/100
Validation Loss: 0.37070030367599344, Validation Accuracy: 0.9801314756671899, Best F-score: 0.7486033519553073, Best Threshold: 0.9382821917533875
Epoch 4/100
Validation Loss: 0.5064458283332159, Validation Accuracy: 0.9801314756671899, Best F-score: 0.7488112466404796, Best Threshold: 0.9534609913825989
Epoch 5/100
Validation Loss: 0.5103459995029107, Validation Accuracy: 0.9800170068027211, Best F-score: 0.747937293729373, Best Threshold: 0.9457011818885803
Epoch 6/100
Validation Loss: 0.6598007928158315, Validation Accuracy: 0.980671114599686, Best F-score: 0.744045041143352, Best Threshold: 0.9441694021224976
Epoch 7/100
Validation Loss: 0.6362836944150194,

In [None]:
#grid_search(hyper_parameter_dict, results_csv_path)

In [None]:
# train single
# bs 20 
#lstm_hidden_size= 70
#dropout_rate = 0.2
#learning_rate = 4e-5
#clip_grad_value = 2.0
#custom_weight_value = 27.0

avg_train_loss_per_epoch, avg_val_loss_per_epoch, train_accuracies, val_accuracies, train_f_scores, val_f_scores = train_model(model, train_dataset, train_dataloader, val_dataloader, criterion, optimizer, num_epochs, clip_grad_value, weighting_method='custom', custom_value=custom_weight_value, patience = 50)

Epoch 1/100
Validation Loss: 1.0357710485750775, Validation Accuracy: 0.9771716378859236, Best F-score: 0.6688804554079696, Best Threshold: 0.9696148037910461
Epoch 2/100
Validation Loss: 1.124192273841714, Validation Accuracy: 0.97854526425955, Best F-score: 0.6810889645114243, Best Threshold: 0.9705095291137695
Epoch 3/100
Validation Loss: 1.1666829138994217, Validation Accuracy: 0.9775968079539508, Best F-score: 0.6791569086651054, Best Threshold: 0.9663196206092834
Epoch 4/100
Validation Loss: 1.3827161829707757, Validation Accuracy: 0.976304945054945, Best F-score: 0.6713540485370832, Best Threshold: 0.9650532007217407
Epoch 5/100
Validation Loss: 1.317360872789374, Validation Accuracy: 0.9773842229199372, Best F-score: 0.6803790154841691, Best Threshold: 0.9659895896911621
Epoch 6/100
Validation Loss: 1.3777492953077801, Validation Accuracy: 0.9764194139194139, Best F-score: 0.6774049217002237, Best Threshold: 0.9627013802528381
Epoch 7/100
Validation Loss: 1.2183032280431603, Va

In [None]:
#grid_search_results = grid_search(hyper_parameter_dict, train_dataset, train_dataloader, val_dataloader, criterion, num_epochs)