This is the code of "BERT Error Detection for STPA" (BEDS) Pipeline.

This Python Notebook was created and evaluated in Google Colab Pro (A100 GPU).

This code can be found in the GitHub repository:
https://github.com/andreyokamura-unicamp/BEDS-Pipeline

The BERT models for each Pipeline step trained in this notebook can be found in
https://huggingface.co/andreyunic23


Corresponding Author: A.T. Okamura

In [1]:
# !pip install datasets
# !pip install evaluate
# !pip install sentence-transformers==3.1.1
# !pip install transformers==4.45.2

In [2]:
import time
import pandas as pd
import numpy as np
from torch.nn import functional as F
import torch
from datasets import Dataset

from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import ConfusionMatrixDisplay

from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer, AutoTokenizer, DataCollatorWithPadding
import evaluate

from sentence_transformers import (
    SentenceTransformer,
    SentenceTransformerTrainer,
    SentenceTransformerTrainingArguments,
    losses,
)

In [3]:
# Setting seed variables

random_state = 0

In [4]:
# Define input file name here:
file_name = 'stpa-dataset.csv'

# Uncomment 4 lines below if using Google Drive
# from google.colab import drive
# drive.mount('/content/drive')
# drive_path = '/content/drive/MyDrive/Colab Notebooks/'
# file_name = drive_path+file_name

df = pd.read_csv(file_name, index_col=False, delimiter=',')

# remove unnecessary columns for training
df.drop(['domain', 'year', 'title', 'source', 'slide'], inplace=True, axis=1)
print(df.head())

                                            sentence       label validity  \
0  A collision between the ACROBOTER robotic plat...  constraint    valid   
1   A non‐patient is injured or killed by radiation.        loss  invalid   
2  A nonpatient is injured or killed in the proce...        loss  invalid   
3  A pair of controlled aircraft violates minimum...      hazard    valid   
4  A person or worker is standing/working under o...      hazard  invalid   

      faults  
0      valid  
1  condition  
2  condition  
3      valid  
4    rewrite  


#Explore the dataset

In [5]:
test_df = df.copy()
print(test_df[['sentence','faults']])

                                               sentence     faults
0     A collision between the ACROBOTER robotic plat...      valid
1      A non‐patient is injured or killed by radiation.  condition
2     A nonpatient is injured or killed in the proce...  condition
3     A pair of controlled aircraft violates minimum...      valid
4     A person or worker is standing/working under o...    rewrite
...                                                 ...        ...
1079    Avoid flight in altitude below permitted limit.        not
1080            System is capable of defending threats.        not
1081                     Verify if the runway is empty.        not
1082       Sudden braking olny in emergency situations.        not
1083        Vehicle overtaking only when road is empty.        not

[1084 rows x 2 columns]


In [6]:
# Dataset overview
df.head()

# Class distribution of each target column
print(df.label.value_counts())
print('\n')
print(df.validity.value_counts())
print('\n')
print(df.faults.value_counts())

label
hazard        424
constraint    369
loss          291
Name: count, dtype: int64


validity
valid      780
invalid    304
Name: count, dtype: int64


faults
valid        780
rewrite      122
not           77
condition     60
accident      45
Name: count, dtype: int64


In [7]:
# Distribution of valid and invalid sentences for each class
out = df.groupby('label')['validity'].value_counts(normalize=False)
print(out)
print('\n')

# Length of the longest sentence in the dataset
print(df.sentence.str.len().max())

label       validity
constraint  valid       301
            invalid      68
hazard      valid       287
            invalid     137
loss        valid       192
            invalid      99
Name: count, dtype: int64


270


In [8]:
# Generate list of valid examples for sentence similarity calculation (for use in execution example)

valid_loss_reference = df[df.label.isin(['loss']) & df.validity.isin(['valid'])]['sentence'].drop_duplicates()
valid_hazard_reference = df[df.label.isin(['hazard']) & df.validity.isin(['valid'])]['sentence'].drop_duplicates()
valid_constraint_reference = df[df.label.isin(['constraint']) & df.validity.isin(['valid'])]['sentence'].drop_duplicates()

valid_loss_reference.to_csv('valid_loss_reference.csv',index=False, header=False, sep=',')
valid_hazard_reference.to_csv('valid_hazard_reference.csv',index=False, header=False, sep=',')
valid_constraint_reference.to_csv('valid_constraint_reference.csv',index=False, header=False, sep=',')

In [9]:
step1_labels = ['loss', 'hazard', 'constraint']
step2_labels = ['valid', 'invalid']
step3_labels = ['rewrite', 'not', 'condition', 'accident', 'valid']


# Convert target labels to int as input for BERT prediction target
def label_to_int(df, name, labels):
  aux_df = df.copy()
  for i in range(len(aux_df[name])):
    aux_df.loc[i,name] = labels.index(aux_df.loc[i,name])
  return aux_df

In [10]:
def compute_metrics(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
  acc = accuracy_score(labels, preds)
  return {
  'accuracy': acc,
  'f1': f1,
  'precision': precision,
  'recall': recall
  }

# Path to save model
path = '/content/drive/MyDrive/Colab Notebooks/models/BEDS5/'
model_name = "google-bert/bert-base-uncased"

tokenizer = AutoTokenizer.from_pretrained(model_name)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

In [11]:
# Rename target column, convert label to int and convert to Dataset class
def format_dataset(df):
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)
  return Dataset.from_pandas(df)

# Transform input into embedding
def tokenize_function(examples):
    return tokenizer(examples['sentence'], truncation=True, padding='max_length', max_length=512, return_tensors='pt')


def train_model(train_dataset, test_dataset, num_labels):

  train_dataset = format_dataset(train_dataset)
  test_dataset = format_dataset(test_dataset)

  train_dataset = train_dataset.map(tokenize_function, batched=True)
  test_dataset = test_dataset.map(tokenize_function, batched=True)

  model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased", num_labels=num_labels)

  training_args = TrainingArguments(
      output_dir="beds_model",
      learning_rate=2e-5,
      per_device_train_batch_size=16,
      per_device_eval_batch_size=16,
      num_train_epochs=12,
      weight_decay=0.01,
      eval_strategy="epoch",
      save_strategy="epoch",
  )

  trainer = Trainer(
      model=model,
      args=training_args,
      train_dataset=train_dataset,
      eval_dataset=test_dataset,
      tokenizer=tokenizer,
      data_collator=data_collator,
      compute_metrics=compute_metrics,
  )

  trainer.train()


  eval = trainer.evaluate()

  return model, eval

In [12]:
aux_df = df.copy()
aux_df.reset_index(drop=True, inplace=True)

aux_df = label_to_int(aux_df, 'label', step1_labels)      # Convert Step 1 Labels
aux_df = label_to_int(aux_df, 'validity', step2_labels) # Convert Step 2 Labels
aux_df = label_to_int(aux_df, 'faults', step3_labels)      # Convert Step 3 Labels


In [13]:
# Pipeline fine-tuning and evaluation

# Create lists to save models and evaluation results obtained in each fold
model_list_step1 = []
model_list_step2_loss = []
model_list_step2_hazard = []
model_list_step2_constraint = []
model_list_step3_loss = []
model_list_step3_hazard = []
model_list_step3_constraint = []

eval_list_step1 = []
eval_list_step2_loss = []
eval_list_step2_hazard = []
eval_list_step2_constraint = []
eval_list_step3_loss = []
eval_list_step3_hazard = []
eval_list_step3_constraint = []

time_list_step1 = []
time_list_step2_loss = []
time_list_step2_hazard = []
time_list_step2_constraint = []
time_list_step3_loss = []
time_list_step3_hazard = []
time_list_step3_constraint = []


skf = StratifiedKFold(n_splits=5, random_state=random_state, shuffle=True)

for i, (train_index, val_index) in enumerate(skf.split(aux_df['sentence'].to_list(), aux_df['label'].to_list())):
    train_df = aux_df.iloc[train_index]
    val_df = aux_df.iloc[val_index]

    print('# Fold: ', i+1)

    # Step 1
    print('# Step 1')
    start = time.perf_counter()
    model_step, eval_step = train_model(train_df[['sentence','label']], val_df[['sentence','label']], 3)
    end = time.perf_counter()
    time_list_step1.append(end-start)
    model_list_step1.append(model_step)
    eval_list_step1.append(eval_step)


    # Step 2 - Loss
    print('# Step 2 - Loss')
    train_df_loss = train_df[train_df.label.isin([0])] # 'loss' == 0
    val_df_loss = val_df[val_df.label.isin([0])] # 'loss' == 0
    start = time.perf_counter()
    model_step, eval_step = train_model(train_df_loss[['sentence','validity']], val_df_loss[['sentence','validity']], 2)
    end = time.perf_counter()
    time_list_step2_loss.append(end-start)
    model_list_step2_loss.append(model_step)
    eval_list_step2_loss.append(eval_step)

    # Step 2 - Hazard
    print('# Step 2 - Hazard')
    train_df_hazard = train_df[train_df.label.isin([1])] # 'hazard' == 1
    val_df_hazard = val_df[val_df.label.isin([1])] # 'hazard' == 1
    start = time.perf_counter()
    model_step, eval_step = train_model(train_df_hazard[['sentence','validity']], val_df_hazard[['sentence','validity']], 2)
    end = time.perf_counter()
    time_list_step2_hazard.append(end-start)
    model_list_step2_hazard.append(model_step)
    eval_list_step2_hazard.append(eval_step)

    # Step 2 - Constraint
    print('# Step 2 - Constraint')
    train_df_constraint = train_df[train_df.label.isin([2])] # 'constraint' == 2
    val_df_constraint = val_df[val_df.label.isin([2])] # 'constraint' == 2
    start = time.perf_counter()
    model_step, eval_step = train_model(train_df_constraint[['sentence','validity']], val_df_constraint[['sentence','validity']], 2)
    end = time.perf_counter()
    time_list_step2_constraint.append(end-start)
    model_list_step2_constraint.append(model_step)
    eval_list_step2_constraint.append(eval_step)


    # Step 3 - Loss
    print('# Step 3 - Loss')
    train_df_loss = train_df[train_df.label.isin([0]) & train_df.validity.isin([1])] # 'loss' == 0; 'incorrect' == 1
    val_df_loss = val_df[val_df.label.isin([0]) & val_df.validity.isin([1])] # 'loss' == 0; 'incorrect' == 1
    start = time.perf_counter()
    model_step, eval_step = train_model(train_df_loss[['sentence','faults']], val_df_loss[['sentence','faults']], 4)
    end = time.perf_counter()
    time_list_step3_loss.append(end-start)
    model_list_step3_loss.append(model_step)
    eval_list_step3_loss.append(eval_step)

    # Step 3 - Hazard
    print('# Step 3 - Hazard')
    train_df_hazard = train_df[train_df.label.isin([1]) & train_df.validity.isin([1])] # 'hazard' == 1; 'incorrect' == 1
    val_df_hazard = val_df[val_df.label.isin([1]) & val_df.validity.isin([1])] # 'hazard' == 1; 'incorrect' == 1
    start = time.perf_counter()
    model_step, eval_step = train_model(train_df_hazard[['sentence','faults']], val_df_hazard[['sentence','faults']], 4)
    end = time.perf_counter()
    time_list_step3_hazard.append(end-start)
    model_list_step3_hazard.append(model_step)
    eval_list_step3_hazard.append(eval_step)

    # Step 3 - Constraint
    print('# Step3 - Constraint')
    train_df_constraint = train_df[train_df.label.isin([2]) & train_df.validity.isin([1])] # 'constraint' == 2; 'incorrect' == 1
    val_df_constraint = val_df[val_df.label.isin([2]) & val_df.validity.isin([1])] # 'constraint' == 2; 'incorrect' == 1
    start = time.perf_counter()
    model_step, eval_step = train_model(train_df_constraint[['sentence','faults']], val_df_constraint[['sentence','faults']], 2)
    end = time.perf_counter()
    time_list_step3_constraint.append(end-start)
    model_list_step3_constraint.append(model_step)
    eval_list_step3_constraint.append(eval_step)

# Fold:  1
# Step 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/867 [00:00<?, ? examples/s]

Map:   0%|          | 0/217 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33ma213119[0m ([33ma213119-unicamp[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.302529,0.898618,0.895763,0.893942,0.898538
2,No log,0.245116,0.917051,0.915381,0.912885,0.919701
3,No log,0.391034,0.912442,0.911228,0.911184,0.921256
4,No log,0.326722,0.930876,0.9299,0.927885,0.934534
5,No log,0.293121,0.935484,0.934253,0.931743,0.939038
6,No log,0.390574,0.926267,0.925282,0.924458,0.934846
7,No log,0.37357,0.930876,0.929773,0.927611,0.936942
8,No log,0.343469,0.926267,0.924724,0.922315,0.92937
9,No log,0.362745,0.930876,0.92959,0.927119,0.935117
10,0.104500,0.364386,0.935484,0.934253,0.931743,0.939038


# Step 2 - Loss


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/233 [00:00<?, ? examples/s]

Map:   0%|          | 0/58 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.6083,0.672414,0.541407,0.824074,0.586957
2,No log,0.483867,0.810345,0.79252,0.816667,0.78323
3,No log,0.36017,0.844828,0.826174,0.873512,0.811801
4,No log,0.361484,0.87931,0.864802,0.916667,0.847826
5,No log,0.404943,0.896552,0.885526,0.926829,0.869565
6,No log,0.683328,0.810345,0.774956,0.880435,0.76087
7,No log,0.442473,0.896552,0.885526,0.926829,0.869565
8,No log,0.487544,0.896552,0.885526,0.926829,0.869565
9,No log,0.479607,0.896552,0.885526,0.926829,0.869565
10,No log,0.461763,0.896552,0.885526,0.926829,0.869565


# Step 2 - Hazard


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/339 [00:00<?, ? examples/s]

Map:   0%|          | 0/85 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.54663,0.752941,0.662507,0.691176,0.650956
2,No log,0.541704,0.741176,0.67216,0.677489,0.668033
3,No log,0.58009,0.670588,0.639394,0.64059,0.669399
4,No log,0.742545,0.658824,0.634327,0.641862,0.673839
5,No log,0.797388,0.741176,0.680669,0.680669,0.680669
6,No log,0.884389,0.752941,0.673137,0.690989,0.663593
7,No log,1.095388,0.682353,0.654835,0.656813,0.690232
8,No log,1.084014,0.729412,0.690909,0.68399,0.710383
9,No log,1.165843,0.705882,0.670082,0.665501,0.693989
10,No log,1.208947,0.705882,0.670082,0.665501,0.693989


# Step 2 - Constraint


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/295 [00:00<?, ? examples/s]

Map:   0%|          | 0/74 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.28258,0.837838,0.455882,0.418919,0.5
2,No log,0.172675,0.932432,0.871304,0.885281,0.858871
3,No log,0.194295,0.945946,0.900538,0.900538,0.900538
4,No log,0.232966,0.932432,0.861267,0.913675,0.825269
5,No log,0.12355,0.959459,0.927805,0.91488,0.942204
6,No log,0.186748,0.972973,0.946609,0.984375,0.916667
7,No log,0.187013,0.972973,0.946609,0.984375,0.916667
8,No log,0.26203,0.945946,0.900538,0.900538,0.900538
9,No log,0.270471,0.945946,0.900538,0.900538,0.900538
10,No log,0.272782,0.945946,0.900538,0.900538,0.900538


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Step 3 - Loss


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/76 [00:00<?, ? examples/s]

Map:   0%|          | 0/23 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.2061,0.478261,0.403409,0.5625,0.433929
2,No log,1.09481,0.565217,0.3625,0.301282,0.464286
3,No log,1.082768,0.608696,0.445833,0.551282,0.514286
4,No log,1.056212,0.652174,0.514568,0.5625,0.564286
5,No log,1.021613,0.652174,0.509607,0.485119,0.564286
6,No log,0.972386,0.652174,0.509607,0.485119,0.564286
7,No log,0.944121,0.652174,0.509607,0.485119,0.564286
8,No log,0.901553,0.652174,0.509607,0.485119,0.564286
9,No log,0.888264,0.652174,0.509607,0.485119,0.564286
10,No log,0.888232,0.652174,0.509607,0.485119,0.564286


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Step 3 - Hazard


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/113 [00:00<?, ? examples/s]

Map:   0%|          | 0/24 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.272803,0.375,0.234615,0.180556,0.34375
2,No log,1.150262,0.5,0.361888,0.347222,0.427083
3,No log,1.023815,0.625,0.522727,0.525,0.552083
4,No log,0.987961,0.625,0.522727,0.525,0.552083
5,No log,0.99015,0.666667,0.556119,0.53869,0.59375
6,No log,1.000102,0.666667,0.560606,0.551282,0.59375
7,No log,0.978174,0.708333,0.590055,0.565341,0.635417
8,No log,0.989204,0.666667,0.556119,0.53869,0.59375
9,No log,1.004254,0.666667,0.560606,0.551282,0.59375
10,No log,0.939633,0.708333,0.590055,0.565341,0.635417


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Step3 - Constraint


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/56 [00:00<?, ? examples/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.623701,0.833333,0.828571,0.875,0.833333
2,No log,0.520828,0.75,0.733333,0.833333,0.75
3,No log,0.448613,0.833333,0.828571,0.875,0.833333
4,No log,0.392734,1.0,1.0,1.0,1.0
5,No log,0.34042,1.0,1.0,1.0,1.0
6,No log,0.284855,1.0,1.0,1.0,1.0
7,No log,0.239035,1.0,1.0,1.0,1.0
8,No log,0.213471,1.0,1.0,1.0,1.0
9,No log,0.195735,1.0,1.0,1.0,1.0
10,No log,0.181797,1.0,1.0,1.0,1.0


# Fold:  2
# Step 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/867 [00:00<?, ? examples/s]

Map:   0%|          | 0/217 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.335353,0.940092,0.936199,0.941509,0.93259
2,No log,0.17814,0.9447,0.941656,0.943418,0.940162
3,No log,0.168552,0.949309,0.946357,0.949242,0.944084
4,No log,0.179112,0.963134,0.961714,0.959511,0.964976
5,No log,0.220211,0.953917,0.950758,0.957875,0.94618
6,No log,0.200375,0.958525,0.956826,0.955018,0.959229
7,No log,0.195027,0.958525,0.956369,0.957228,0.955578
8,No log,0.215258,0.958525,0.95583,0.961495,0.951927
9,No log,0.204294,0.958525,0.956369,0.957228,0.955578
10,0.130600,0.20943,0.963134,0.961325,0.961325,0.961325


# Step 2 - Loss


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/233 [00:00<?, ? examples/s]

Map:   0%|          | 0/58 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.441772,0.827586,0.737319,0.901961,0.705882
2,No log,0.349087,0.87931,0.851771,0.858631,0.845768
3,No log,0.291275,0.913793,0.889943,0.918831,0.870158
4,No log,0.307736,0.896552,0.875179,0.875179,0.875179
5,No log,0.297907,0.896552,0.875179,0.875179,0.875179
6,No log,0.299501,0.896552,0.875179,0.875179,0.875179
7,No log,0.280415,0.931034,0.919444,0.908232,0.934003
8,No log,0.286067,0.913793,0.897707,0.891667,0.904591
9,No log,0.316563,0.913793,0.900787,0.886842,0.921808
10,No log,0.313872,0.931034,0.919444,0.908232,0.934003


# Step 2 - Hazard


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/339 [00:00<?, ? examples/s]

Map:   0%|          | 0/85 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.577193,0.694118,0.575,0.696667,0.589394
2,No log,0.506778,0.764706,0.709699,0.769486,0.69697
3,No log,0.515821,0.776471,0.739558,0.764727,0.728788
4,No log,0.543741,0.823529,0.798419,0.816667,0.787879
5,No log,0.611781,0.811765,0.76776,0.839967,0.748485
6,No log,0.565464,0.8,0.775656,0.784163,0.769697
7,No log,0.634531,0.811765,0.793939,0.793939,0.793939
8,No log,0.730795,0.811765,0.782886,0.805669,0.771212
9,No log,0.747834,0.823529,0.805314,0.807882,0.80303
10,No log,0.755921,0.823529,0.805314,0.807882,0.80303


# Step 2 - Constraint


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/295 [00:00<?, ? examples/s]

Map:   0%|          | 0/74 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.259017,0.851351,0.459854,0.425676,0.5
2,No log,0.075956,0.986486,0.972253,0.992188,0.954545
3,No log,0.029023,1.0,1.0,1.0,1.0
4,No log,0.012247,1.0,1.0,1.0,1.0
5,No log,0.003597,1.0,1.0,1.0,1.0
6,No log,0.001891,1.0,1.0,1.0,1.0
7,No log,0.001262,1.0,1.0,1.0,1.0
8,No log,0.001039,1.0,1.0,1.0,1.0
9,No log,0.000909,1.0,1.0,1.0,1.0
10,No log,0.000833,1.0,1.0,1.0,1.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Step 3 - Loss


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/82 [00:00<?, ? examples/s]

Map:   0%|          | 0/17 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.032754,0.647059,0.593407,0.767196,0.655556
2,No log,0.925597,0.588235,0.511111,0.466667,0.6
3,No log,0.839008,0.647059,0.541667,0.484848,0.666667
4,No log,0.765489,0.823529,0.811966,0.875,0.833333
5,No log,0.694304,0.941176,0.937322,0.952381,0.933333
6,No log,0.689666,0.941176,0.937322,0.952381,0.933333
7,No log,0.567185,0.941176,0.937322,0.952381,0.933333
8,No log,0.547406,0.941176,0.937322,0.952381,0.933333
9,No log,0.529077,0.941176,0.937322,0.952381,0.933333
10,No log,0.527721,0.941176,0.937322,0.952381,0.933333


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Step 3 - Hazard


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/107 [00:00<?, ? examples/s]

Map:   0%|          | 0/30 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.272146,0.233333,0.125,0.194444,0.092105
2,No log,1.093757,0.666667,0.396899,0.436111,0.398246
3,No log,1.005847,0.666667,0.396899,0.436111,0.398246
4,No log,0.879828,0.7,0.451852,0.462302,0.447368
5,No log,0.860434,0.7,0.436508,0.468599,0.431579
6,No log,0.76923,0.7,0.448943,0.451852,0.447368
7,No log,0.728688,0.8,0.530713,0.518519,0.547368
8,No log,0.760379,0.7,0.436508,0.468599,0.431579
9,No log,0.700543,0.733333,0.46592,0.484472,0.464912
10,No log,0.661816,0.733333,0.477193,0.47619,0.480702


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Step3 - Constraint


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/57 [00:00<?, ? examples/s]

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.611448,0.545455,0.47619,0.75,0.583333
2,No log,0.500629,0.818182,0.816667,0.857143,0.833333
3,No log,0.38917,1.0,1.0,1.0,1.0
4,No log,0.302898,1.0,1.0,1.0,1.0
5,No log,0.252685,1.0,1.0,1.0,1.0
6,No log,0.207282,1.0,1.0,1.0,1.0
7,No log,0.177075,1.0,1.0,1.0,1.0
8,No log,0.151332,1.0,1.0,1.0,1.0
9,No log,0.128247,1.0,1.0,1.0,1.0
10,No log,0.115302,1.0,1.0,1.0,1.0


# Fold:  3
# Step 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/867 [00:00<?, ? examples/s]

Map:   0%|          | 0/217 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.341821,0.907834,0.906263,0.915922,0.900398
2,No log,0.195281,0.917051,0.916067,0.921046,0.912476
3,No log,0.233211,0.917051,0.916558,0.916319,0.917952
4,No log,0.275537,0.930876,0.929775,0.938579,0.92424
5,No log,0.334282,0.9447,0.944071,0.958763,0.936005
6,No log,0.339167,0.9447,0.944071,0.958763,0.936005
7,No log,0.344174,0.935484,0.934512,0.945074,0.928162
8,No log,0.348259,0.940092,0.939277,0.951797,0.932083
9,No log,0.363077,0.940092,0.939277,0.951797,0.932083
10,0.122700,0.37213,0.9447,0.944071,0.958763,0.936005


# Step 2 - Loss


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/233 [00:00<?, ? examples/s]

Map:   0%|          | 0/58 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.557624,0.62069,0.422101,0.807018,0.521739
2,No log,0.380443,0.827586,0.809211,0.843615,0.797516
3,No log,0.264958,0.862069,0.859394,0.856631,0.870807
4,No log,0.200267,0.931034,0.92795,0.92795,0.92795
5,No log,0.164106,0.931034,0.92795,0.92795,0.92795
6,No log,0.148379,0.965517,0.963975,0.963975,0.963975
7,No log,0.204667,0.931034,0.928922,0.924848,0.935404
8,No log,0.195894,0.948276,0.946346,0.943627,0.949689
9,No log,0.21943,0.931034,0.926768,0.93565,0.920497
10,No log,0.250829,0.948276,0.946346,0.943627,0.949689


# Step 2 - Hazard


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/339 [00:00<?, ? examples/s]

Map:   0%|          | 0/85 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.527197,0.764706,0.477887,0.880952,0.52381
2,No log,0.492807,0.717647,0.642356,0.636667,0.65253
3,No log,0.424105,0.858824,0.788557,0.835714,0.762277
4,No log,0.496335,0.835294,0.771154,0.781499,0.762649
5,No log,0.547729,0.847059,0.783715,0.80141,0.770461
6,No log,0.736092,0.811765,0.747024,0.747024,0.747024
7,No log,0.955353,0.776471,0.712889,0.70526,0.723586
8,No log,0.932175,0.8,0.735396,0.731962,0.739211
9,No log,0.914801,0.823529,0.758934,0.763462,0.754836
10,No log,0.94062,0.823529,0.758934,0.763462,0.754836


# Step 2 - Constraint


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/295 [00:00<?, ? examples/s]

Map:   0%|          | 0/74 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.403011,0.783784,0.439394,0.391892,0.5
2,No log,0.217522,0.905405,0.831545,0.946154,0.78125
3,No log,0.204625,0.918919,0.860025,0.953125,0.8125
4,No log,0.217162,0.932432,0.886746,0.960317,0.84375
5,No log,0.14912,0.959459,0.935671,0.97541,0.90625
6,No log,0.238247,0.959459,0.935671,0.97541,0.90625
7,No log,0.227122,0.959459,0.935671,0.97541,0.90625
8,No log,0.243989,0.959459,0.935671,0.97541,0.90625
9,No log,0.271214,0.959459,0.935671,0.97541,0.90625
10,No log,0.285986,0.945946,0.911905,0.967742,0.875


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Step 3 - Loss


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/76 [00:00<?, ? examples/s]

Map:   0%|          | 0/23 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.174894,0.434783,0.292763,0.270833,0.330357
2,No log,1.065454,0.521739,0.386513,0.552083,0.397321
3,No log,0.998297,0.565217,0.408333,0.429487,0.433036
4,No log,0.947906,0.695652,0.527592,0.5625,0.540179
5,No log,0.883925,0.869565,0.666369,0.65873,0.683036
6,No log,0.763077,0.913043,0.69958,0.686508,0.714286
7,No log,0.693386,0.913043,0.697619,0.683036,0.714286
8,No log,0.691053,0.869565,0.680952,0.683036,0.683036
9,No log,0.664231,0.869565,0.680952,0.683036,0.683036
10,No log,0.63242,0.869565,0.680952,0.683036,0.683036


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Step 3 - Hazard


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/116 [00:00<?, ? examples/s]

Map:   0%|          | 0/21 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.225288,0.52381,0.291667,0.375,0.333333
2,No log,1.127213,0.52381,0.291667,0.375,0.333333
3,No log,1.019566,0.761905,0.575,0.666667,0.583333
4,No log,0.936072,0.714286,0.53953,0.65625,0.533333
5,No log,0.91954,0.666667,0.507246,0.582418,0.533333
6,No log,0.883055,0.761905,0.622925,0.63141,0.641667
7,No log,0.917753,0.666667,0.499126,0.572917,0.533333
8,No log,0.908139,0.761905,0.622925,0.63141,0.641667
9,No log,0.8836,0.761905,0.622925,0.63141,0.641667
10,No log,0.898539,0.714286,0.582784,0.588068,0.616667


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Step3 - Constraint


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/52 [00:00<?, ? examples/s]

Map:   0%|          | 0/16 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.611854,0.875,0.875,0.875,0.875
2,No log,0.535428,0.75,0.733333,0.833333,0.75
3,No log,0.444806,0.9375,0.937255,0.944444,0.9375
4,No log,0.402682,0.9375,0.937255,0.944444,0.9375
5,No log,0.373453,0.875,0.873016,0.9,0.875
6,No log,0.330874,0.9375,0.937255,0.944444,0.9375
7,No log,0.29224,0.9375,0.937255,0.944444,0.9375
8,No log,0.270297,0.9375,0.937255,0.944444,0.9375
9,No log,0.256782,0.9375,0.937255,0.944444,0.9375
10,No log,0.247823,0.9375,0.937255,0.944444,0.9375


# Fold:  4
# Step 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/867 [00:00<?, ? examples/s]

Map:   0%|          | 0/217 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.335922,0.898618,0.897766,0.900463,0.896206
2,No log,0.195428,0.926267,0.924452,0.929404,0.920902
3,No log,0.208566,0.9447,0.941621,0.951086,0.935928
4,No log,0.257375,0.935484,0.93414,0.939128,0.93057
5,No log,0.216868,0.9447,0.942945,0.948514,0.938996
6,No log,0.287292,0.940092,0.939685,0.938556,0.941794
7,No log,0.271218,0.940092,0.939514,0.939535,0.939969
8,No log,0.26904,0.949309,0.948578,0.955046,0.944161
9,No log,0.276707,0.935484,0.934399,0.937052,0.932396
10,0.117200,0.274635,0.940092,0.939102,0.942857,0.936317


# Step 2 - Loss


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/233 [00:00<?, ? examples/s]

Map:   0%|          | 0/58 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.480679,0.758621,0.632246,0.7025,0.620536
2,No log,0.465825,0.775862,0.759796,0.761649,0.825893
3,No log,0.331271,0.862069,0.827381,0.827381,0.827381
4,No log,0.296302,0.827586,0.791966,0.783333,0.803571
5,No log,0.369594,0.844828,0.821416,0.806306,0.854167
6,No log,0.402273,0.844828,0.821416,0.806306,0.854167
7,No log,0.60614,0.827586,0.804318,0.790404,0.842262
8,No log,0.613963,0.827586,0.804318,0.790404,0.842262
9,No log,0.706318,0.827586,0.804318,0.790404,0.842262
10,No log,0.705909,0.827586,0.804318,0.790404,0.842262


# Step 2 - Hazard


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/339 [00:00<?, ? examples/s]

Map:   0%|          | 0/85 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.567589,0.635294,0.388489,0.317647,0.5
2,No log,0.420057,0.776471,0.749263,0.76369,0.741637
3,No log,0.333004,0.870588,0.865834,0.860302,0.884409
4,No log,0.422697,0.823529,0.802049,0.8191,0.792413
5,No log,0.443814,0.823529,0.805314,0.81391,0.799283
6,No log,0.630377,0.811765,0.76776,0.860294,0.748805
7,No log,0.629133,0.8,0.771542,0.797473,0.760155
8,No log,0.747355,0.8,0.775656,0.791395,0.767025
9,No log,0.730445,0.835294,0.810026,0.846073,0.794803
10,No log,0.762028,0.8,0.775656,0.791395,0.767025


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Step 2 - Constraint


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/295 [00:00<?, ? examples/s]

Map:   0%|          | 0/74 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.356547,0.783784,0.439394,0.391892,0.5
2,No log,0.212044,0.932432,0.892785,0.928752,0.866379
3,No log,0.122457,0.972973,0.960129,0.960129,0.960129
4,No log,0.196236,0.945946,0.916384,0.939286,0.897629
5,No log,0.165817,0.959459,0.938792,0.949718,0.928879
6,No log,0.242079,0.959459,0.938792,0.949718,0.928879
7,No log,0.252611,0.959459,0.938792,0.949718,0.928879
8,No log,0.242489,0.959459,0.938792,0.949718,0.928879
9,No log,0.254837,0.959459,0.938792,0.949718,0.928879
10,No log,0.256905,0.959459,0.938792,0.949718,0.928879


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Step 3 - Loss


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/83 [00:00<?, ? examples/s]

Map:   0%|          | 0/16 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.26308,0.25,0.186364,0.142857,0.270833
2,No log,1.226057,0.4375,0.439286,0.5625,0.520833
3,No log,1.212614,0.5,0.521429,0.568182,0.604167
4,No log,1.203364,0.375,0.286364,0.21875,0.4375
5,No log,1.152563,0.4375,0.388095,0.319643,0.520833
6,No log,1.151909,0.5,0.475649,0.44375,0.604167
7,No log,1.136018,0.5625,0.530952,0.482143,0.6875
8,No log,1.126723,0.5625,0.530952,0.482143,0.6875
9,No log,1.111339,0.5625,0.530952,0.482143,0.6875
10,No log,1.102638,0.5625,0.530952,0.482143,0.6875


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Step 3 - Hazard


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/106 [00:00<?, ? examples/s]

Map:   0%|          | 0/31 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.164112,0.580645,0.292929,0.423913,0.261905
2,No log,1.055037,0.580645,0.313589,0.317857,0.309524
3,No log,0.912388,0.741935,0.405882,0.385526,0.440476
4,No log,0.847235,0.774194,0.426067,0.406944,0.452381
5,No log,0.739199,0.83871,0.43968,0.410354,0.47619
6,No log,0.689933,0.83871,0.43968,0.410354,0.47619
7,No log,0.691124,0.870968,0.460606,0.436141,0.488095
8,No log,0.66737,0.83871,0.43968,0.410354,0.47619
9,No log,0.661816,0.83871,0.43968,0.410354,0.47619
10,No log,0.65044,0.83871,0.43968,0.410354,0.47619


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Step3 - Constraint


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/52 [00:00<?, ? examples/s]

Map:   0%|          | 0/16 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.576051,0.8125,0.768116,0.884615,0.75
2,No log,0.468305,0.875,0.854545,0.916667,0.833333
3,No log,0.411595,0.875,0.854545,0.916667,0.833333
4,No log,0.344331,0.875,0.854545,0.916667,0.833333
5,No log,0.302137,0.875,0.854545,0.916667,0.833333
6,No log,0.301226,0.875,0.854545,0.916667,0.833333
7,No log,0.301587,0.875,0.854545,0.916667,0.833333
8,No log,0.286738,0.875,0.854545,0.916667,0.833333
9,No log,0.275327,0.875,0.854545,0.916667,0.833333
10,No log,0.262364,0.875,0.854545,0.916667,0.833333


# Fold:  5
# Step 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/868 [00:00<?, ? examples/s]

Map:   0%|          | 0/216 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.302907,0.944444,0.942833,0.944133,0.941694
2,No log,0.10852,0.972222,0.970828,0.975,0.967783
3,No log,0.120326,0.953704,0.953157,0.950828,0.958636
4,No log,0.088706,0.967593,0.966811,0.966498,0.967178
5,No log,0.202641,0.953704,0.952899,0.950705,0.958636
6,No log,0.135922,0.958333,0.957025,0.955396,0.959241
7,No log,0.195127,0.953704,0.952542,0.950427,0.956955
8,No log,0.147181,0.967593,0.96642,0.96573,0.967178
9,No log,0.163489,0.962963,0.961882,0.959921,0.964891
10,0.130100,0.161967,0.962963,0.961714,0.960482,0.96321


# Step 2 - Loss


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/232 [00:00<?, ? examples/s]

Map:   0%|          | 0/59 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.547995,0.728814,0.58156,0.854545,0.6
2,No log,0.440842,0.830508,0.776515,0.897959,0.75
3,No log,0.330476,0.864407,0.834734,0.886508,0.812179
4,No log,0.345589,0.864407,0.834734,0.886508,0.812179
5,No log,0.296127,0.932203,0.920054,0.953488,0.9
6,No log,0.402283,0.864407,0.828488,0.914894,0.8
7,No log,0.342906,0.932203,0.920054,0.953488,0.9
8,No log,0.393621,0.915254,0.898451,0.943182,0.875
9,No log,0.390631,0.932203,0.920054,0.953488,0.9
10,No log,0.40154,0.932203,0.920054,0.953488,0.9


# Step 2 - Hazard


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/340 [00:00<?, ? examples/s]

Map:   0%|          | 0/84 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.603927,0.630952,0.386861,0.315476,0.5
2,No log,0.542823,0.738095,0.693634,0.730159,0.685332
3,No log,0.523937,0.75,0.733333,0.731971,0.734936
4,No log,0.566104,0.77381,0.768259,0.768182,0.787279
5,No log,0.742762,0.75,0.745638,0.75,0.768411
6,No log,0.863778,0.785714,0.777647,0.774008,0.790018
7,No log,0.981842,0.785714,0.775401,0.771429,0.783323
8,No log,1.025063,0.77381,0.761613,0.758235,0.767194
9,No log,1.07125,0.785714,0.775401,0.771429,0.783323
10,No log,1.098432,0.77381,0.761613,0.758235,0.767194


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Step 2 - Constraint


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/296 [00:00<?, ? examples/s]

Map:   0%|          | 0/73 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.320782,0.821918,0.451128,0.410959,0.5
2,No log,0.176797,0.945205,0.892962,0.96875,0.846154
3,No log,0.173611,0.958904,0.922587,0.97619,0.884615
4,No log,0.289564,0.945205,0.892962,0.96875,0.846154
5,No log,0.156127,0.972603,0.953205,0.953205,0.953205
6,No log,0.229595,0.958904,0.922587,0.97619,0.884615
7,No log,0.254622,0.958904,0.922587,0.97619,0.884615
8,No log,0.23875,0.958904,0.922587,0.97619,0.884615
9,No log,0.273434,0.958904,0.922587,0.97619,0.884615
10,No log,0.276964,0.958904,0.922587,0.97619,0.884615


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Step 3 - Loss


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/79 [00:00<?, ? examples/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.177011,0.55,0.469281,0.463636,0.508333
2,No log,1.140348,0.5,0.395238,0.5,0.466667
3,No log,1.119435,0.5,0.339367,0.269886,0.458333
4,No log,1.077586,0.55,0.447222,0.532738,0.520833
5,No log,1.039106,0.7,0.633523,0.583333,0.708333
6,No log,1.014169,0.65,0.562937,0.494048,0.666667
7,No log,0.996523,0.55,0.47013,0.433333,0.583333
8,No log,0.962227,0.65,0.562937,0.494048,0.666667
9,No log,0.944004,0.6,0.52013,0.458333,0.604167
10,No log,0.932151,0.65,0.571023,0.520833,0.645833


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Step 3 - Hazard


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/106 [00:00<?, ? examples/s]

Map:   0%|          | 0/31 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.163967,0.580645,0.350678,0.459627,0.409722
2,No log,0.96979,0.741935,0.514858,0.555,0.535714
3,No log,0.829337,0.83871,0.659722,0.638889,0.686508
4,No log,0.713277,0.83871,0.659722,0.638889,0.686508
5,No log,0.648878,0.83871,0.659722,0.638889,0.686508
6,No log,0.614332,0.83871,0.659722,0.638889,0.686508
7,No log,0.592641,0.870968,0.684454,0.660294,0.722222
8,No log,0.576445,0.870968,0.684454,0.660294,0.722222
9,No log,0.549808,0.870968,0.67973,0.661184,0.700397
10,No log,0.546785,0.83871,0.659722,0.638889,0.686508


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Step3 - Constraint


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={df.columns[0]:'sentence', df.columns[1]:'labels'}, inplace=True)


Map:   0%|          | 0/55 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.59779,0.769231,0.763636,0.785714,0.833333
2,No log,0.458918,0.846154,0.783333,0.909091,0.75
3,No log,0.389502,0.846154,0.783333,0.909091,0.75
4,No log,0.30229,1.0,1.0,1.0,1.0
5,No log,0.243889,1.0,1.0,1.0,1.0
6,No log,0.202299,1.0,1.0,1.0,1.0
7,No log,0.170406,1.0,1.0,1.0,1.0
8,No log,0.147147,1.0,1.0,1.0,1.0
9,No log,0.127741,1.0,1.0,1.0,1.0
10,No log,0.115551,1.0,1.0,1.0,1.0


In [14]:
def get_eval_index(eval_list, time_list):
  accuracy_list = []
  precision_list = []
  recall_list = []
  f1_list = []
  score_sum = []
  for eval in eval_list:
    print(eval)
    accuracy_list.append(eval['eval_accuracy'])
    precision_list.append(eval['eval_precision'])
    recall_list.append(eval['eval_recall'])
    f1_list.append(eval['eval_f1'])
    score_sum.append(eval['eval_accuracy']+eval['eval_precision']+eval['eval_recall']+eval['eval_f1'])
  print('###### Average: ######')
  print('Accuracy:   ', np.mean(accuracy_list))
  print('Precision:  ', np.mean(precision_list))
  print('Recall:     ', np.mean(recall_list))
  print('F1-Score:   ', np.mean(f1_list))
  print('Fine-tuning:', np.mean(time_list))

  print('Highest sum of scores index:', score_sum.index(max(score_sum)))
  return score_sum.index(max(score_sum))

In [15]:
#Step 4

model_step4 = SentenceTransformer("all-mpnet-base-v2")

sentence_list = df['sentence'].to_list()
labels_list = [0] * len(sentence_list)
df_step4 = Dataset.from_dict({
    "sentence1": sentence_list,
    "sentence2": sentence_list,
    "label": labels_list,
})

loss_step4 = losses.ContrastiveTensionLossInBatchNegatives(model=model_step4)

args_step4 = SentenceTransformerTrainingArguments(
    output_dir="beds-all-mpnet-base-v2",
    num_train_epochs=12,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=2e-5,
    save_strategy="epoch",

)

s_trainer = SentenceTransformerTrainer(
    model=model_step4,
    args=args_step4,
    train_dataset=df_step4,
    loss=loss_step4
)
s_trainer.train()

#model_step4.push_to_hub('beds_step4')

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Step,Training Loss
500,0.0658


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

TrainOutput(global_step=816, training_loss=0.04325545418496225, metrics={'train_runtime': 121.6586, 'train_samples_per_second': 106.922, 'train_steps_per_second': 6.707, 'total_flos': 0.0, 'train_loss': 0.04325545418496225, 'epoch': 12.0})

In [16]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [17]:
# Show training results and save best models

# Step 1

print('# Step 1')
best_index = get_eval_index(eval_list_step1, time_list_step1)
#model_list_step1[best_index].push_to_hub("beds_step1")

# Step 2

print('# Step 2 - Loss')
best_index = get_eval_index(eval_list_step2_loss, time_list_step2_loss)
#model_list_step2_loss[best_index].push_to_hub("beds_step2_loss")

print('# Step 2 - Hazard')
best_index = get_eval_index(eval_list_step2_hazard, time_list_step2_hazard)
#model_list_step2_hazard[best_index].push_to_hub("beds_step2_hazard")

print('# Step 2 - Constraint')
best_index = get_eval_index(eval_list_step2_constraint, time_list_step2_constraint)
#model_list_step2_constraint[best_index].push_to_hub("beds_step2_constraint")

# Step 3

print('# Step 3 - Loss')
best_index = get_eval_index(eval_list_step3_loss, time_list_step3_loss)
#model_list_step3_loss[best_index].push_to_hub("beds_step3_loss")

print('# Step 3 - Hazard')
best_index = get_eval_index(eval_list_step3_hazard, time_list_step3_hazard)
#model_list_step3_hazard[best_index].push_to_hub("beds_step3_hazard")

print('# Step 3 - Constraint')
best_index = get_eval_index(eval_list_step3_constraint, time_list_step3_constraint)
#model_list_step3_constraint[best_index].push_to_hub("beds_step3_constraint")

# Step 1
{'eval_loss': 0.3690735399723053, 'eval_accuracy': 0.9354838709677419, 'eval_f1': 0.9342533256967483, 'eval_precision': 0.9317428583646938, 'eval_recall': 0.9390384299106409, 'eval_runtime': 1.5844, 'eval_samples_per_second': 136.964, 'eval_steps_per_second': 8.836, 'epoch': 12.0}
{'eval_loss': 0.2094995677471161, 'eval_accuracy': 0.9585253456221198, 'eval_f1': 0.956369183829138, 'eval_precision': 0.9572283421732627, 'eval_recall': 0.9555780933062881, 'eval_runtime': 1.5675, 'eval_samples_per_second': 138.436, 'eval_steps_per_second': 8.931, 'epoch': 12.0}
{'eval_loss': 0.38085705041885376, 'eval_accuracy': 0.9447004608294931, 'eval_f1': 0.9440713607380274, 'eval_precision': 0.9587628865979382, 'eval_recall': 0.936004970487729, 'eval_runtime': 1.5508, 'eval_samples_per_second': 139.929, 'eval_steps_per_second': 9.028, 'epoch': 12.0}
{'eval_loss': 0.2705400586128235, 'eval_accuracy': 0.9447004608294931, 'eval_f1': 0.9438275029822177, 'eval_precision': 0.948851148851149, 'eval_r