### Imports

In [1]:
from glob import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from helper import *
from DataLoaderGeneric import *
from BertController import *
import json


from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding, Trainer, TrainerCallback, TrainingArguments
from transformers import BertTokenizer, BertForSequenceClassification
import torch.nn as nn
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

### C3 Training and saving model

In [2]:
data = DataLoaderGeneric(name='c3')
bert = BertController(max_seq=512, num_labels=data.num_labels)
data_dict = bert.extract_tokens_labels(texts=data.text, labels=data.labels)
train_loader, validation_loader = data.train_test_split(data_dict,batch_size=16)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [None]:
bert.train(train_loader, validation_loader, epochs=3)

In [None]:
bert.save_model('c3_256_3')

### FeedBack Training and saving model

In [None]:
data = DataLoaderGeneric(name='feedback')
bert = BertController(max_seq=512, num_labels=data.num_labels)
data_dict = bert.extract_tokens_labels(texts=data.text, labels=data.labels)
train_loader, validation_loader = data.train_test_split(data_dict)

In [None]:
bert.train(train_loader, validation_loader, epochs=2)

In [None]:
bert.save_model('feedback_512_2')

### Loading of discourse dataset and generic function for pre-trained models loading and testing

In [3]:
data = DataLoaderGeneric(name='discourse')
BASE_MODEL = "bert-base-uncased"
id2label = {k:l for k, l in enumerate(labels_lst_org)}
label2id = {l:k for k, l in enumerate(labels_lst_org)}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
data_dict = tokenizer(data.text, truncation=True, padding="max_length", max_length=350)
labels = data.labels
data_dict['labels'] = labels
train_dataloader, validation_dataloader, test_dataloader = data.train_test_split_discourse(tree_d=data.tree_depth, labels=labels, data_dict=data_dict,  batch_size=16, validation_ratio=0.2)

In [30]:
def evaluate_multilabel_classification(preds, true_labels):
    preds = preds.cpu().numpy()
    true_labels = true_labels.cpu().numpy()
    f1 = f1_score(true_labels, preds, average='weighted')
    precision = precision_score(true_labels, preds, average='weighted')
    recall = recall_score(true_labels, preds, average='weighted')
    accuracy = accuracy_score(true_labels, preds)
    print(f'F1-score: {f1}, Precision: {precision}, Recall: {recall}, Accuracy: {accuracy}')
    
    
def run_pretrained_model(model_name, epochs=6):
    model = BertForSequenceClassification.from_pretrained(model_name)
    # Move the model to the GPU
    # Change the output layer to produce 31 outputs
    model.classifier = nn.Linear(model.classifier.in_features, 31)
    # Use the BinaryCrossEntropyLoss for multilabel classification
    loss_fn = nn.BCEWithLogitsLoss()
    loss_fn = loss_fn.to(device)
    model = model.to(device)
    # Set optimizer and scheduler
    optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

    # Train the model
    for epoch in range(1, epochs):
        # Train one epoch
        model.train()
        for step, batch in enumerate(train_dataloader):
            input_ids, attention_masks, labels = batch
            input_ids = input_ids.to(device)
            attention_masks = attention_masks.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(input_ids, attention_masks)
            loss = loss_fn(outputs[0], labels.float())
            loss.backward()
            optimizer.step()
        # Evaluate on validation set
        model.eval()
        val_loss = 0
        preds = []
        true_labels = []
        for step, batch in enumerate(validation_dataloader):
            input_ids, attention_masks, labels = batch
            input_ids = input_ids.to(device)
            attention_masks = attention_masks.to(device)
            labels = labels.to(device)
            outputs = model(input_ids, attention_masks)
            val_loss += loss_fn(outputs[0], labels.float()).item()
            preds.append(outputs[0].sigmoid() > 0.5)
            true_labels.append(labels)
        preds = torch.cat(preds, dim=0)
        true_labels = torch.cat(true_labels, dim=0)
        evaluate_multilabel_classification(preds, true_labels)
    
    # test
    preds = []
    true_labels = []
    for step, batch in enumerate(test_dataloader):
        input_ids, attention_masks, labels = batch
        input_ids = input_ids.to(device)

        attention_masks = attention_masks.to(device)
        labels = labels.to(device)
        outputs = model(input_ids, attention_masks)
        val_loss += loss_fn(outputs[0], labels.float()).item()
        preds.append(outputs[0].sigmoid() > 0.5)
        true_labels.append(labels)
    preds = torch.cat(preds, dim=0)
    true_labels = torch.cat(true_labels, dim=0)
    evaluate_multilabel_classification(preds, true_labels)

### Discourse trained on feedback

In [10]:
run_pretrained_model('saved_models/feedback_512_2')

  _warn_prf(average, modifier, msg_start, len(result))


F1-score: 0.26085151725049044, Precision: 0.21467244630867027, Recall: 0.3323432343234323, Accuracy: 0.14893617021276595


  _warn_prf(average, modifier, msg_start, len(result))


F1-score: 0.2521940977120666, Precision: 0.23107784587982608, Recall: 0.27755775577557756, Accuracy: 0.13885778275475924


  _warn_prf(average, modifier, msg_start, len(result))


F1-score: 0.2609809370507603, Precision: 0.2182932276550298, Recall: 0.3244224422442244, Accuracy: 0.14781634938409854


  _warn_prf(average, modifier, msg_start, len(result))


F1-score: 0.2609809370507603, Precision: 0.2182932276550298, Recall: 0.3244224422442244, Accuracy: 0.14781634938409854


  _warn_prf(average, modifier, msg_start, len(result))


F1-score: 0.2614502578342441, Precision: 0.2219024296046626, Recall: 0.31815181518151814, Accuracy: 0.14837625979843225
F1-score: 0.23165023079168093, Precision: 0.19596862266167198, Recall: 0.28321805606115763, Accuracy: 0.12949640287769784


  _warn_prf(average, modifier, msg_start, len(result))


### Discourse trained on C3

In [None]:
run_pretrained_model('saved_models/c3_512_2')

### Discourse trained on each label

In [2]:
task_name = 'discourse'  # ['c3', 'feedback', 'discourse']
data = DataLoaderGeneric(name=task_name)
results = {}

In [None]:
from tqdm import tqdm

for i in tqdm(range(31)):
    print("-"*25, labels_lst_org[i], "-"*25)
    labels = data.labels[:, i]
    bert = BertController(256,2,'saved_models/c3_512_2')
    data_dict = bert.tokenizer(data.text, truncation=True, padding="max_length", max_length=256)
    train_dataloader, validation_dataloader, test_dataloader = data.train_test_split_discourse(labels, data.tree_depth, data_dict,  batch_size=16, validation_ratio=0.2)
    # model, optimizer = get_pretraind_bert('clean', 2)
    # model.cuda()
    bert.train(train_dataloader, validation_dataloader, epochs=3)
    results[labels_lst_org[i]] = bert.metrics

with open("results each label c3 transfer.txt", 'a') as file:
    for key in results.keys():
        file.write(key+'\n')
        for key2 in results[key].keys():
            file.write(str(results[key][key2])+'\n')