# Load Data

In [None]:
import csv
import numpy as np
def load_dataset(filepath):
    with open(filepath, 'r') as f:
        reader = csv.reader(f, delimiter=",")
        data = np.array(list(reader))
        return data

In [None]:
folder = 'sentence'

filepath = 'capstone/dataset/'+folder+'/sentence_train_dataset_balanced.csv'
data = load_dataset(filepath)
print(filepath,len(data))
train_texts = list(data[:,0])
train_labels = list(data[:,1].astype(int))
            
filepath = 'capstone/dataset/'+folder+'/sentence_val_dataset_balanced.csv'
data = load_dataset(filepath)
print(filepath,len(data))
val_texts = list(data[:,0])
val_labels = list(data[:,1].astype(int))

# Encode Data and create pytorch dataloader

In [None]:
import torch
from transformers import *
from torch.utils.data import DataLoader

model_name = "bert-base-uncased"

tokenizer = AutoTokenizer.from_pretrained(model_name)

train_encodings = tokenizer(train_texts, truncation=True, padding=True)
print(train_encodings.keys())
val_encodings = tokenizer(val_texts, truncation=True, padding=True)

class bertDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = bertDataset(train_encodings, train_labels)
val_dataset = bertDataset(val_encodings, val_labels) 

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
eval_loader = DataLoader(val_dataset, batch_size=16)

del data, train_texts, val_texts, train_labels, val_labels
del train_encodings, val_encodings

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Using device: {device}")


# Load Bert Model

In [None]:
import torch
import torch.nn as nn
from transformers import BertModel

# Create the BertClassfier class
class BertClassifier(nn.Module):
    """Bert Model for Classification Tasks.
    """
    def __init__(self,model_name, freeze_bert=False):
        """
        @param    bert: a BertModel object
        @param    classifier: a torch.nn.Module classifier
        @param    freeze_bert (bool): Set `False` to fine-tune the BERT model
        """
        super(BertClassifier, self).__init__()
        # Specify hidden size of BERT, hidden size of our classifier, and number of labels
        D_in, H, D_out = 768, 50, 4
        
        # Instantiate BERT model
        bert_base_cased = BertModel.from_pretrained(model_name)  # Instantiate model using the trained weights
        config = BertConfig.from_pretrained(model_name)
        if num_hidden != 12:
            config.num_hidden_layers = num_hidden
        self.bert = BertModel.from_pretrained(model_name, config=config)


        # Instantiate an one-layer feed-forward classifier
        self.classifier = nn.Sequential(
            nn.Linear(D_in, H),
            nn.ReLU(),
            nn.Linear(H, D_out)
        )

        # Freeze the BERT model
        if freeze_bert:
            for param in self.bert.parameters():
                param.requires_grad = False
        
    def forward(self, input_ids, attention_mask):
        """
        Feed input to BERT and the classifier to compute logits.
        @param    input_ids (torch.Tensor): an input tensor with shape (batch_size,
                      max_length)
        @param    attention_mask (torch.Tensor): a tensor that hold attention mask
                      information with shape (batch_size, max_length)
        @return   logits (torch.Tensor): an output tensor with shape (batch_size,
                      num_labels)
        """
        # Feed input to BERT
        outputs = self.bert(input_ids=input_ids,
                            attention_mask=attention_mask)
        
        # Extract the last hidden state of the token `[CLS]` for classification task
        last_hidden_state_cls = outputs[0][:, 0, :]

        # Feed input to classifier to compute logits
        logits = self.classifier(last_hidden_state_cls)

        return logits

    
num_epochs=4
num_hidden = 12 # 2, 6, 14, 18
model_name = "bert-base-uncased"
# model_name = "TODBERT/TOD-BERT-JNT-V1"

# Instantiate Bert Classifier
bert_model = BertClassifier(model_name,freeze_bert=False)

# Tell PyTorch to run the model on GPU
bert_model.to(device)

# Create the optimizer
optimizer = AdamW(bert_model.parameters(),
                  lr=5e-5,    # Default learning rate
                  eps=1e-8    # Default epsilon value
                  )

# Total number of training steps
total_steps = len(train_loader) * num_epochs

# Set up the learning rate scheduler
scheduler = get_linear_schedule_with_warmup(optimizer,
                                            num_warmup_steps=200, # Default value
                                            num_training_steps=total_steps)


# Train Model

In [None]:
from datasets import load_metric
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [15, 5]
import torch.nn as nn
import time

# Specify loss function
loss_fn = nn.CrossEntropyLoss()

# load metric function
metric_f1 = load_metric("f1")
metric_acc = load_metric("accuracy")

num_hidden = str(num_hidden) + 'freeze'
print("training the model now...")

batch_loss, train_loss, validation_loss = [], [], []
acc_epoch = []
start = time.time()

for epoch in range(num_epochs):
    # put model in train mode
    bert_model.train()
    total_loss = 0

    print(f'=========Running {epoch} of {num_epochs}=========')
    print("Running training batch...")
    for i, batch in enumerate(train_loader):

        if i % 10 == 0:
            print(f'--- Processing batch {i} ----')
            
        # Zero out any previously calculated gradients
        bert_model.zero_grad()

        b_input_ids = batch["input_ids"].to(device)
        b_input_mask = batch["attention_mask"].to(device)
        b_labels = batch["labels"].to(device)
        outputs = bert_model(b_input_ids, attention_mask=b_input_mask)#,labels=b_labels)

        # Compute loss and accumulate the loss values
        loss = loss_fn(outputs, b_labels)
        batch_loss.append(loss.item())
        total_loss += loss.item()
        
#         for i in range(len(b_input_ids)):
#             print(tokenizer.convert_ids_to_tokens(b_input_ids[i],skip_special_tokens = False),b_labels[i])

        loss.backward()
        
        # Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
        torch.nn.utils.clip_grad_norm_(bert_model.parameters(), 1.0)

        # Update parameters and the learning rate
        optimizer.step()
        scheduler.step()

    avg_train_loss = total_loss / len(train_loader)
    train_loss.append(avg_train_loss)
    print(f"\nAverage train loss: {avg_train_loss}")
    
    # put model in evalauation model
    bert_model.eval()

    eval_loss = 0
    predictions, true_labels = [], []
    
    print("Running validation batch...")
    for i, batch in enumerate(eval_loader):

        if i % 10 == 0:
            print(f'--- Processing {i} ----')
        b_input_ids = batch["input_ids"].to(device)
        b_input_mask = batch["attention_mask"].to(device)
        b_labels = batch["labels"].to(device)
        
        # get predictions for eval dataset
        with torch.no_grad():
            outputs = bert_model(b_input_ids, attention_mask=b_input_mask)

        loss = loss_fn(outputs, b_labels)
        eval_loss += loss    
        
        predictions.extend(torch.argmax(outputs, dim=1).flatten())
        true_labels.extend(b_labels)
    
    eval_loss = eval_loss / len(eval_loader)
    validation_loss.append(eval_loss)
    print(f"Validation loss: {eval_loss}")

    # calculate f1 score and accuracy
    f1 = metric_f1.compute(predictions=predictions, references=true_labels,average=None)
    print(f'Validation F1 score: {f1["f1"]}')
    acc = metric_acc.compute(predictions=predictions, references=true_labels)
    print(f'\nValidation Accuracy: {acc["accuracy"]}')
    acc_epoch.append(acc["accuracy"])
    
    # plot losses and accuracy
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3)
    
    ax1.plot(train_loss)
    ax1.set_title('Training Loss')
    ax1.set_xlabel('epoch')
    ax1.set_ylabel('loss value')
    
    ax3.plot(acc_epoch)
    ax3.set_title('Accuracy')
    ax3.set_xlabel('epoch')
    ax3.set_ylabel('acc value')

    ax2.plot(validation_loss)
    ax2.set_title('Validation Loss')
    ax2.set_xlabel('epoch')
    ax2.set_ylabel('loss value')
    
    plt.savefig("Results/"+model_name+"_"+num_hidden+"_"+"results.png")
    plt.show()
    
    f = open("Results/"+model_name+"_"+num_hidden+"_results.txt", "a")
    f.write(f"\nAverage train loss: {avg_train_loss}")
    f.write(f"Validation loss: {eval_loss}")
    f.write(f'Validation F1 score: {f1["f1"]}')
    f.write(f'\nValidation Accuracy: {acc["accuracy"]}')
    f.close()
    
# pront time taken to train model
end =time.time()
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
print("{:0>2}:{:0>2}:{:0>2}".format(int(hours),int(minutes),int(seconds)))

# save model
torch.save(bert_model.state_dict(), "Results/"+model_name+"_"+num_hidden+"_test.torch")

f = open("Results/"+model_name+"_"+num_hidden+"_results.txt", "a")
f.write("\n Time elapsed {:0>2}:{:0>2}:{:0>2}".format(int(hours),int(minutes),int(seconds)))
f.close()
print("{:0>2}:{:0>2}:{:0>2}".format(int(hours),int(minutes),int(seconds)))

print("Done",model_name+"_"+num_hidden+"_results")  