## Train BERT Model


In [4]:
import numpy as np
from collections import defaultdict
import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification, BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
import json
import mlflow
from imblearn.over_sampling import SMOTE, ADASYN
import time


def train_model(data_path, num_epochs, learning_rate, batch_size, oversample_method=False, oversample_labels=None, cross_validation=False):
    # Set MLFlow experiment name
    mlflow.set_experiment("SentenceClass")

    # Check if CUDA is available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print('Using device:', device)

    # Load BERT tokenizer
    tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
    #tokenizer = RobertaTokenizer.from_pretrained('roberta-large')

    # Read the JSON file
    def read_json(file_path):
        with open(file_path, 'r') as file:
            data = json.load(file)
        return data

    data = read_json(data_path)

    # Extract sentences and labels
    sentences = [token['name'] for token in data['tokens']]
    labels = [code['tore'] for code in data['codes']]
    label_mapping = {'0': 0, 'Domain Level': 1, 'Interaction Level': 2, 'System Level': 3}
    #label_mapping = {'0': 0, 'Domain Level': 1, 'Interaction Level': 2}
    #label_mapping = {'0': 0, 'System Level': 1}
    # label_mapping = {'0': 0, 'Level': 1}
    labels = [label_mapping[label] if label in label_mapping else label for label in labels]

    # Tokenize input sentences
    tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]

    # Convert tokens to input IDs
    input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]

    # Pad input sequences
    max_len = max(len(x) for x in input_ids)
    padded_input_ids = [x + [tokenizer.pad_token_id] * (max_len - len(x)) for x in input_ids]

    # Convert to PyTorch tensors
    input_ids_tensor = torch.tensor(padded_input_ids).to(device)
    labels_tensor = torch.tensor(labels).to(device)

    # Create attention masks
    attention_masks = [[float(i != tokenizer.pad_token_id) for i in ii] for ii in padded_input_ids]

    if cross_validation:
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        fold = 1
        avg_precision = 0
        avg_recall = 0
        avg_f1 = 0

        for train_index, val_index in kf.split(input_ids_tensor):
            print(f"Fold {fold}")

            if oversample_method:
                # Apply SMOTE to training data for selected labels
                if oversample_labels is not None:
                    oversample_indices = [i for i, label in enumerate(labels_tensor[train_index]) if label in oversample_labels]
                    train_inputs_oversample, train_labels_oversample = input_ids_tensor[train_index][oversample_indices], labels_tensor[train_index][oversample_indices]
                else:
                    train_inputs_oversample, train_labels_oversample = input_ids_tensor[train_index], labels_tensor[train_index]
                    
                if oversample_method == "smote":
                    oversampler = SMOTE(random_state=42)
                elif oversample_method == "adasyn":
                    oversampler = ADASYN(random_state=42)
                if oversample_labels==None:
                    train_inputs_smote, train_labels_smote = oversampler.fit_resample(input_ids_tensor[train_index], labels_tensor[train_index])
                    train_inputs_tensor = torch.tensor(train_inputs_smote).to(device)
                    train_labels_tensor = torch.tensor(train_labels_smote).to(device)    
                else:
                    train_inputs_oversample, train_labels_oversample = oversampler.fit_resample(train_inputs_oversample.cpu().numpy(), train_labels_oversample.cpu().numpy())
                    train_inputs_oversample = torch.tensor(train_inputs_oversample).to(device)
                    train_labels_oversample = torch.tensor(train_labels_oversample).to(device)
                    train_inputs_tensor = torch.cat((input_ids_tensor[train_index], train_inputs_oversample), dim=0)
                    train_labels_tensor = torch.cat((labels_tensor[train_index], train_labels_oversample), dim=0)

                # Print occurrences of each code after SMOTE
                code_occurrences_after_smote = defaultdict(int)
                for code in train_labels_tensor.cpu().numpy():
                    code_occurrences_after_smote[code] += 1
        
                print("Code occurrences in the training data after Oversample:")
                for code, count in code_occurrences_after_smote.items():
                    print(f"{code}: {count}")
            else:
                train_inputs_tensor = input_ids_tensor[train_index]
                train_labels_tensor = labels_tensor[train_index]

            train_masks = [[float(i != tokenizer.pad_token_id) for i in ii] for ii in train_inputs_tensor]

            train_data = TensorDataset(train_inputs_tensor, torch.tensor(train_masks), train_labels_tensor)
            train_dataloader = DataLoader(train_data, sampler=RandomSampler(train_data), batch_size=batch_size)

            validation_inputs, validation_masks = input_ids_tensor[val_index], torch.tensor(attention_masks)[val_index]
            validation_labels = labels_tensor[val_index]

            validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
            validation_dataloader = DataLoader(validation_data, sampler=SequentialSampler(validation_data), batch_size=batch_size)

            precision, recall, f1 = train_and_evaluate_model(train_dataloader, validation_dataloader, num_epochs, learning_rate, device, fold, batch_size, oversample_method, cross_validation)

            avg_precision += precision
            avg_recall += recall
            avg_f1 += f1

            fold += 1

        avg_precision /= 5
        avg_recall /= 5
        avg_f1 /= 5

        print(f"Average Precision across 5 folds: {avg_precision}")
        print(f"Average Recall across 5 folds: {avg_recall}")
        print(f"Average F1 Score across 5 folds: {avg_f1}")

        mlflow.log_metric("avg_precision", avg_precision)
        mlflow.log_metric("avg_recall", avg_recall)
        mlflow.log_metric("avg_f1", avg_f1)

        mlflow.end_run()

    else:
        # Split data into train and validation sets
        train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(input_ids_tensor, labels_tensor, random_state=42, test_size=0.2)
        train_masks, validation_masks, _, _ = train_test_split(attention_masks, input_ids_tensor, random_state=42, test_size=0.2)

        # Print occurrences of each code before SMOTE
        code_occurrences_before_sampling = defaultdict(int)
        for code in train_labels.cpu().numpy():
            code_occurrences_before_sampling[code] += 1

        print("Code occurrences in the training data:")
        for code, count in code_occurrences_before_sampling.items():
            print(f"{code}: {count}")

        if oversample_method:
                # Apply SMOTE to training data for selected labels
            if oversample_labels is not None:
                oversample_indices = [i for i, label in enumerate(train_labels) if label in oversample_labels]
                train_inputs_oversample, train_labels_oversample = train_inputs[oversample_indices], train_labels[oversample_indices]
            else:
                train_inputs_oversample, train_labels_oversample = train_inputs, train_labels
    
            if oversample_method == "smote":
                oversampler = SMOTE(random_state=42)
            elif oversample_method == "adasyn":
                oversampler = ADASYN(sampling_strategy="auto", random_state=42)
            if oversample_labels==None:
                train_inputs_oversample, train_labels_oversample = oversampler.fit_resample(train_inputs.cpu().numpy(), train_labels.cpu().numpy())
                train_inputs = torch.tensor(train_inputs_oversample).to(device)
                train_labels = torch.tensor(train_labels_oversample).to(device)
                train_masks = [[float(i != tokenizer.pad_token_id) for i in ii] for ii in train_inputs]
            else:
                train_inputs_oversample, train_labels_oversample = oversampler.fit_resample(train_inputs_oversample.cpu().numpy(), train_labels_oversample.cpu().numpy())
                train_inputs_oversample = torch.tensor(train_inputs_oversample).to(device)
                train_labels_oversample = torch.tensor(train_labels_oversample).to(device)
                train_inputs = torch.cat((train_inputs, train_inputs_oversample), dim=0)
                train_labels = torch.cat((train_labels, train_labels_oversample), dim=0)
                train_masks = [[float(i != tokenizer.pad_token_id) for i in ii] for ii in train_inputs]

            # Print occurrences of each code after SMOTE
            code_occurrences_after_smote = defaultdict(int)
            for code in train_labels.cpu().numpy():
                code_occurrences_after_smote[code] += 1

            print("Code occurrences in the training data after Oversample:")
            for code, count in code_occurrences_after_smote.items():
                print(f"{code}: {count}")
    
        # Convert to PyTorch dataset
        train_data = TensorDataset(train_inputs, torch.tensor(train_masks), train_labels)
        validation_data = TensorDataset(validation_inputs, torch.tensor(validation_masks).to(device), validation_labels)

        # Create data loaders
        train_dataloader = DataLoader(train_data, sampler=RandomSampler(train_data), batch_size=batch_size)
        validation_dataloader = DataLoader(validation_data, sampler=SequentialSampler(validation_data), batch_size=batch_size)

        # Train and evaluate model
        precision, recall, f1 = train_and_evaluate_model(train_dataloader, validation_dataloader, num_epochs, learning_rate, device, 1, batch_size, oversample_method, cross_validation) 
        
        mlflow.end_run()

    return

def brennan_perediger_kappa(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    n = np.sum(cm)
    pa = np.trace(cm) / n
    pe = np.sum(np.sum(cm, axis=0) * np.sum(cm, axis=1)) / (n * n)
    kappa = (pa - pe) / (1 - pe)
    return kappa

def train_and_evaluate_model(train_dataloader, validation_dataloader, num_epochs, learning_rate, device, fold, batch_size, oversample_method, cross_validation,):
    total_steps = len(train_dataloader) * num_epochs

    model = BertForSequenceClassification.from_pretrained('bert-large-uncased', num_labels=4).to(device)
    #model = RobertaForSequenceClassification.from_pretrained('roberta-large', num_labels=3).to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

    precision = 0
    recall = 0
    f1 = 0
    precision_dict = defaultdict(float)
    recall_dict = defaultdict(float)
    f1_dict = defaultdict(float)

    for epoch in range(num_epochs):
        print(f"Running Epoch: {epoch+1}")
        model.train()
        total_loss = 0

        for batch in train_dataloader:
            optimizer.zero_grad()
            inputs, masks, labels = [b.to(device) for b in batch]
            outputs = model(inputs, attention_mask=masks, labels=labels)
            loss = outputs.loss
            total_loss += loss.item()
            loss.backward()
            optimizer.step()

        avg_train_loss = total_loss / len(train_dataloader)

        validation_labels_all = []
        validation_preds_all = []

        eval_start_time= time.time()

        for batch in validation_dataloader:
            inputs, masks, labels = [b.to(device) for b in batch]
            with torch.no_grad():
                outputs = model(inputs, attention_mask=masks)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)

            validation_labels_all.extend(labels.cpu().numpy())
            validation_preds_all.extend(preds.cpu().numpy())

        eval_end_time = time.time()
        eval_runtime = eval_end_time - eval_start_time

        # Calculate precision, recall, and F1 score for each label

        for label in range(4):
            precision_dict[label] = precision_score(validation_labels_all, validation_preds_all, labels=[label], average=None, zero_division=0)
            recall_dict[label] = recall_score(validation_labels_all, validation_preds_all, labels=[label], average=None)
            f1_dict[label] = f1_score(validation_labels_all, validation_preds_all, labels=[label], average=None)

            # Calculate overall precision, recall, and F1 score
            precision = precision_score(validation_labels_all, validation_preds_all, average="weighted", zero_division=0)
            recall = recall_score(validation_labels_all, validation_preds_all, average="weighted")
            f1 = f1_score(validation_labels_all, validation_preds_all, average="weighted")

            kappa = brennan_perediger_kappa(validation_labels_all, validation_preds_all)

        avg_val_loss = total_loss / len(validation_dataloader)

        print(f"Precision_epoch{epoch}: {precision}")
        print(f"Recall_epoch{epoch}: {recall}")
        print(f"F1_epoch{epoch}: {f1}")
        print(f"Kappa_epoch{epoch}: {kappa}")
        print(f"AvgTrainLoss_epoch{epoch}: {avg_train_loss}")
        print(f"AvgValLoss_epoch{epoch}: {avg_val_loss}")
        print(f"Eval_Runtime_epoch{epoch}: {eval_runtime}")
        
    with mlflow.start_run():
        # Log precision, recall, and F1 score for each label to MLFlow
        for label in range(4):
            mlflow.log_metric(f"ToreLabel_{label}_precision_fold{fold}", precision_dict[label])
            mlflow.log_metric(f"ToreLabel_{label}_recall_fold{fold}", recall_dict[label])
            mlflow.log_metric(f"ToreLabel_{label}_f1_fold{fold}", f1_dict[label])
            #mlflow.log_artifact(f'confusion_matrix_{fold}')

        # Log overall precision, recall, and F1 score to MLFlow
        mlflow.log_metric(f"precision_fold{fold}", precision)
        mlflow.log_metric(f"recall_fold{fold}", recall)
        mlflow.log_metric(f"f1_fold{fold}", f1)
        mlflow.log_metric(f"kappa_fold{fold}", kappa)
        mlflow.log_param("num_epochs", num_epochs)
        mlflow.log_param("learning_rate", learning_rate)
        mlflow.log_param("batch_size", batch_size)
        mlflow.log_param("smote", oversample_method)
        mlflow.log_param("cross_validation", cross_validation)

    return precision, recall, f1


train_model("E:/BERT/uvl-tore-classifier-bert/src/data/datasets/smartage/SmartAgeTore.json", num_epochs=1, learning_rate=3e-05, batch_size=64, oversample_method=False, oversample_labels=None, cross_validation=False)


Using device: cuda
Code occurrences in the training data:
2: 455
0: 900
1: 1671
3: 39


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Running Epoch: 1
Precision_epoch0: 0.4776051266835982
Recall_epoch0: 0.5619295958279009
F1_epoch0: 0.5105051702455878
Kappa_epoch0: 0.25074573058338523
AvgTrainLoss_epoch0: 1.0611148873964946
AvgValLoss_epoch0: 4.2444595495859785
Eval_Runtime_epoch0: 69.19323396682739


In [1]:
 #Questions as Classification Feature:

In [1]:
import numpy as np
from collections import defaultdict
import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification, BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
import json
import mlflow
from imblearn.over_sampling import SMOTE, ADASYN

def train_model(data_path, num_epochs, learning_rate, batch_size, oversample_method=False, oversample_labels=None, cross_validation=False):
    # Set MLFlow experiment name
    mlflow.set_experiment("SentenceClass")

    # Check if CUDA is available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print('Using device:', device)

    # Load BERT tokenizer
    tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')

    # Read the JSON file
    def read_json(file_path):
        with open(file_path, 'r') as file:
            data = json.load(file)
        return data

    data = read_json(data_path)

    # Extract sentences, questions, and labels
    sentences = [token['name'] for token in data['tokens']]
    questions = [token['question'] for token in data['tokens']]
    labels = [code['tore'] for code in data['codes']]
    label_mapping = {'0': 0, 'Domain Level': 1, 'Interaction Level': 2, 'System Level': 3}
    labels = [label_mapping[label] if label in label_mapping else label for label in labels]

    # Concatenate questions with sentences
    combined_texts = [f"Question: {q} Answer: {s}" for q, s in zip(questions, sentences)]

    # Tokenize input sentences
    tokenized_texts = [tokenizer.tokenize(text) for text in combined_texts]

    # Convert tokens to input IDs
    input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]

    # Pad input sequences
    max_len = max(len(x) for x in input_ids)
    padded_input_ids = [x + [tokenizer.pad_token_id] * (max_len - len(x)) for x in input_ids]

    # Convert to PyTorch tensors
    input_ids_tensor = torch.tensor(padded_input_ids).to(device)
    labels_tensor = torch.tensor(labels).to(device)

    # Create attention masks
    attention_masks = [[float(i != tokenizer.pad_token_id) for i in ii] for ii in padded_input_ids]

    if cross_validation:
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        fold = 1
        avg_precision = 0
        avg_recall = 0
        avg_f1 = 0

        for train_index, val_index in kf.split(input_ids_tensor):
            print(f"Fold {fold}")

            if oversample_method:
                # Apply SMOTE to training data for selected labels
                if oversample_labels is not None:
                    oversample_indices = [i for i, label in enumerate(labels_tensor[train_index]) if label in oversample_labels]
                    train_inputs_oversample, train_labels_oversample = input_ids_tensor[train_index][oversample_indices], labels_tensor[train_index][oversample_indices]
                else:
                    train_inputs_oversample, train_labels_oversample = input_ids_tensor[train_index], labels_tensor[train_index]

                if oversample_method == "smote":
                    oversampler = SMOTE(random_state=42)
                elif oversample_method == "adasyn":
                    oversampler = ADASYN(random_state=42)
                if oversample_labels==None:
                    train_inputs_smote, train_labels_smote = oversampler.fit_resample(input_ids_tensor[train_index], labels_tensor[train_index])
                    train_inputs_tensor = torch.tensor(train_inputs_smote).to(device)
                    train_labels_tensor = torch.tensor(train_labels_smote).to(device)
                else:
                    train_inputs_oversample, train_labels_oversample = oversampler.fit_resample(train_inputs_oversample.cpu().numpy(), train_labels_oversample.cpu().numpy())
                    train_inputs_oversample = torch.tensor(train_inputs_oversample).to(device)
                    train_labels_oversample = torch.tensor(train_labels_oversample).to(device)
                    train_inputs_tensor = torch.cat((input_ids_tensor[train_index], train_inputs_oversample), dim=0)
                    train_labels_tensor = torch.cat((labels_tensor[train_index], train_labels_oversample), dim=0)

                # Print occurrences of each code after SMOTE
                code_occurrences_after_smote = defaultdict(int)
                for code in train_labels_tensor.cpu().numpy():
                    code_occurrences_after_smote[code] += 1

                print("Code occurrences in the training data after Oversample:")
                for code, count in code_occurrences_after_smote.items():
                    print(f"{code}: {count}")
            else:
                train_inputs_tensor = input_ids_tensor[train_index]
                train_labels_tensor = labels_tensor[train_index]

            train_masks = [[float(i != tokenizer.pad_token_id) for i in ii] for ii in train_inputs_tensor]

            train_data = TensorDataset(train_inputs_tensor, torch.tensor(train_masks), train_labels_tensor)
            train_dataloader = DataLoader(train_data, sampler=RandomSampler(train_data), batch_size=batch_size)

            validation_inputs, validation_masks = input_ids_tensor[val_index], torch.tensor(attention_masks)[val_index]
            validation_labels = labels_tensor[val_index]

            validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
            validation_dataloader = DataLoader(validation_data, sampler=SequentialSampler(validation_data), batch_size=batch_size)

            precision, recall, f1 = train_and_evaluate_model(train_dataloader, validation_dataloader, num_epochs, learning_rate, device, fold, batch_size, smote, cross_validation)

            avg_precision += precision
            avg_recall += recall
            avg_f1 += f1

            fold += 1

        avg_precision /= 5
        avg_recall /= 5
        avg_f1 /= 5

        print(f"Average Precision across 5 folds: {avg_precision}")
        print(f"Average Recall across 5 folds: {avg_recall}")
        print(f"Average F1 Score across 5 folds: {avg_f1}")

        mlflow.log_metric("avg_precision", avg_precision)
        mlflow.log_metric("avg_recall", avg_recall)
        mlflow.log_metric("avg_f1", avg_f1)

        mlflow.end_run()

    else:
        # Split data into train and validation sets
        train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(input_ids_tensor, labels_tensor, random_state=42, test_size=0.2)
        train_masks, validation_masks, _, _ = train_test_split(attention_masks, input_ids_tensor, random_state=42, test_size=0.2)

        # Print occurrences of each code before SMOTE
        code_occurrences_before_sampling = defaultdict(int)
        for code in train_labels.cpu().numpy():
            code_occurrences_before_sampling[code] += 1

        print("Code occurrences in the training data:")
        for code, count in code_occurrences_before_sampling.items():
            print(f"{code}: {count}")

        if oversample_method:
            # Apply SMOTE to training data for selected labels
            if oversample_labels is not None:
                oversample_indices = [i for i, label in enumerate(train_labels) if label in oversample_labels]
                train_inputs_oversample, train_labels_oversample = train_inputs[oversample_indices], train_labels[oversample_indices]
            else:
                train_inputs_oversample, train_labels_oversample = train_inputs, train_labels

            if oversample_method == "smote":
                oversampler = SMOTE(random_state=42)
            elif oversample_method == "adasyn":
                oversampler = ADASYN(sampling_strategy="auto", random_state=42)
            if oversample_labels == None:
                train_inputs_oversample, train_labels_oversample = oversampler.fit_resample(train_inputs.cpu().numpy(), train_labels.cpu().numpy())
                train_inputs = torch.tensor(train_inputs_oversample).to(device)
                train_labels = torch.tensor(train_labels_oversample).to(device)
                train_masks = [[float(i != tokenizer.pad_token_id) for i in ii] for ii in train_inputs]
            else:
                train_inputs_oversample, train_labels_oversample = oversampler.fit_resample(train_inputs_oversample.cpu().numpy(), train_labels_oversample.cpu().numpy())
                train_inputs_oversample = torch.tensor(train_inputs_oversample).to(device)
                train_labels_oversample = torch.tensor(train_labels_oversample).to(device)
                train_inputs = torch.cat((train_inputs, train_inputs_oversample), dim=0)
                train_labels = torch.cat((train_labels, train_labels_oversample), dim=0)
                train_masks = [[float(i != tokenizer.pad_token_id) for i in ii] for ii in train_inputs]

            # Print occurrences of each code after SMOTE
            code_occurrences_after_smote = defaultdict(int)
            for code in train_labels.cpu().numpy():
                code_occurrences_after_smote[code] += 1

            print("Code occurrences in the training data after Oversample:")
            for code, count in code_occurrences_after_smote.items():
                print(f"{code}: {count}")

        # Convert to tensors
        train_masks = torch.tensor(train_masks)
        validation_masks = torch.tensor(validation_masks)

        # Create DataLoaders
        train_data = TensorDataset(train_inputs, train_masks, train_labels)
        train_dataloader = DataLoader(train_data, sampler=RandomSampler(train_data), batch_size=batch_size)

        validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
        validation_dataloader = DataLoader(validation_data, sampler=SequentialSampler(validation_data), batch_size=batch_size)

        train_and_evaluate_model(train_dataloader, validation_dataloader, num_epochs, learning_rate, device, None, batch_size, oversample_method, cross_validation)

def train_and_evaluate_model(train_dataloader, validation_dataloader, num_epochs, learning_rate, device, fold, batch_size, smote, cross_validation):
    model = BertForSequenceClassification.from_pretrained('bert-large-uncased', num_labels=4)
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=learning_rate, eps=1e-8)

    total_steps = len(train_dataloader) * num_epochs

    training_stats = []

    for epoch_i in range(0, num_epochs):
        print("")
        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, num_epochs))
        print('Training...')

        model.train()

        total_loss = 0

        for step, batch in enumerate(train_dataloader):
            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_labels = batch[2].to(device)

            model.zero_grad()

            outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
            loss = outputs.loss
            logits = outputs.logits

            total_loss += loss.item()

            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            optimizer.step()

        avg_train_loss = total_loss / len(train_dataloader)

        print("")
        print("  Average training loss: {0:.2f}".format(avg_train_loss))
        print("")
        print("Running Validation...")

        model.eval()

        eval_loss, eval_accuracy = 0, 0
        nb_eval_steps, nb_eval_examples = 0, 0

        true_labels, predictions = [], []

        for batch in validation_dataloader:
            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_labels = batch[2].to(device)

            with torch.no_grad():
                outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)

            logits = outputs.logits

            logits = logits.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()

            predictions.append(logits)
            true_labels.append(label_ids)

        flat_predictions = [item for sublist in predictions for item in sublist]
        flat_true_labels = [item for sublist in true_labels for item in sublist]

        flat_predictions = np.argmax(flat_predictions, axis=1)

        precision = precision_score(flat_true_labels, flat_predictions, average='weighted')
        recall = recall_score(flat_true_labels, flat_predictions, average='weighted')
        f1 = f1_score(flat_true_labels, flat_predictions, average='weighted')

        print("  Precision: {0:.2f}".format(precision))
        print("  Recall: {0:.2f}".format(recall))
        print("  F1: {0:.2f}".format(f1))

        training_stats.append({
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Precision': precision,
            'Recall': recall,
            'F1': f1
        })

        mlflow.log_metric("train_loss_epoch", avg_train_loss, step=epoch_i)
        mlflow.log_metric("precision_fold1", precision, step=epoch_i)
        mlflow.log_metric("recall_fold1", recall, step=epoch_i)
        mlflow.log_metric("f1_fold1", f1, step=epoch_i)

        if fold is not None:
            mlflow.log_metric(f"precision_fold_{fold}_epoch_{epoch_i + 1}", precision)
            mlflow.log_metric(f"recall_fold_{fold}_epoch_{epoch_i + 1}", recall)
            mlflow.log_metric(f"f1_fold_{fold}_epoch_{epoch_i + 1}", f1)

    print("")
    print("Training complete!")

    return precision, recall, f1

# Example usage
data_path = 'C:/Users/mjand/Seafile/Meine Bibliothek/SmartAge/SmartAgeToreWithQuestions.json'
train_model(data_path, num_epochs=12, learning_rate=3e-05, batch_size=64, oversample_method=False, oversample_labels=None, cross_validation=False)


Using device: cuda




Code occurrences in the training data:
2: 455
0: 900
1: 1671
3: 39


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Training...

  Average training loss: 1.10

Running Validation...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


  Precision: 0.34
  Recall: 0.51
  F1: 0.35

Training...

  Average training loss: 1.02

Running Validation...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


  Precision: 0.54
  Recall: 0.56
  F1: 0.46

Training...

  Average training loss: 0.89

Running Validation...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


  Precision: 0.55
  Recall: 0.67
  F1: 0.61

Training...

  Average training loss: 0.73

Running Validation...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


  Precision: 0.72
  Recall: 0.75
  F1: 0.73

Training...

  Average training loss: 0.54

Running Validation...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


  Precision: 0.73
  Recall: 0.76
  F1: 0.74

Training...

  Average training loss: 0.43

Running Validation...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


  Precision: 0.75
  Recall: 0.75
  F1: 0.73

Training...

  Average training loss: 0.32

Running Validation...
  Precision: 0.78
  Recall: 0.76
  F1: 0.76

Training...

  Average training loss: 0.22

Running Validation...
  Precision: 0.78
  Recall: 0.78
  F1: 0.77

Training...

  Average training loss: 0.16

Running Validation...
  Precision: 0.78
  Recall: 0.77
  F1: 0.77

Training...

  Average training loss: 0.14

Running Validation...
  Precision: 0.78
  Recall: 0.76
  F1: 0.77

Training...

  Average training loss: 0.09

Running Validation...
  Precision: 0.78
  Recall: 0.76
  F1: 0.77

Training...

  Average training loss: 0.08

Running Validation...
  Precision: 0.77
  Recall: 0.77
  F1: 0.77

Training complete!
