# <center>Fine Tuning Bert using Skin Cancer Data</center>

The code in this notebook is adopted from: https://colab.research.google.com/drive/1ywsvwO6thOVOrfagjjfuxEf6xVRxbUNO#scrollTo=IUM0UA1qJaVB

Skin cancer data (big and little) can be found in the wiki: https://knowledge.depaul.edu/display/DNLP/Tasks+and+Data

For this notebook, TensorFlow 1.15 is required

Date: 24 August 2020
This is a collaboration between Keith Cochran and Clayton Cohn where Skin Cancer essays can be classified.  This builds on the work from Simon Hughes involving causal reasoning chains. {Doctoral Dissertation: "Automatic Inference of Causal Reasoning Chains from Student Essays", 2019)

In [1]:
import tensorflow as tf
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from pytorch_pretrained_bert import BertTokenizer, BertConfig
from pytorch_pretrained_bert import BertAdam, BertForSequenceClassification
from tqdm import tqdm, trange
import pandas as pd
import io
import os
from pytorch_pretrained_bert.convert_tf_checkpoint_to_pytorch import convert_tf_checkpoint_to_pytorch
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from skopt import dump
from skopt import gp_minimize
from skopt import load
from skopt.plots import plot_evaluations
from skopt.plots import plot_objective
from skopt.space import Categorical
from skopt.space import Integer
from skopt.space import Real

sns.set(style="whitegrid")
plt.set_cmap("viridis")

# If using Google Colab, uncomment this line to make matplotlib inline
#% matplotlib inline

print("Torch version {}".format(torch.__version__))
print("Tensorflow version {}".format(tf.__version__))
print('pandas version {}.'.format(pd.__version__))
print('numpy version {}.'.format(np.__version__))

# Get date and time
import datetime
import pytz


Using TensorFlow backend.


Torch version 1.5.1
Tensorflow version 1.15.0
pandas version 1.0.5.
numpy version 1.18.5.


<Figure size 432x288 with 0 Axes>

## <center>To use the GPU, do the following</center>

<table><tr><th>Environment</th><th>Instruction</th></tr><tr><td>Colab</td><td>Edit->Notebook Settings->Hardware Accelerator and select GPU</td></tr>
    <tr><td>ML PC</td><td>Device is found using the provided libraries</td></tr></table>



In [2]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
print('Cuda Device: {}'.format(torch.cuda.get_device_name(0)))

Found GPU at: /device:GPU:0
Cuda Device: GeForce RTX 2080 Ti


## <center>Manual Parameters</center>

| Hyper Parameter | Recommended Values         
| :- | :-------------
|EPOCHS| 2, 3, 4
|BATCH_SIZE| 16, 32
|MAX_LEN|Length of longest sentence
|LEARNING_RATE|2e-5, 3e-5, 5e-5
|WARMUP|.1
|MODEL_PATH| The path to the model to use (i.e. 'bert-base-uncased')
|COST_SENSITIVITY|0 if unused
|KFOLD|0 or a value for kfold cross-validation


In [3]:
# define the various dimensions we want to test the range of to find the optimal set.
dimensions = [
    Real(low=2e-5, high=5e-5, prior='log-uniform', name='learning_rate'),
]

# To modify values here, uncomment the value desired and comment the others with the same name.
EPOCHS = [2, 3, 4]
BATCH_SIZE = [16, 32]
MAX_LEN = 128
WARMUP = 0.1
MODEL_PATH_PREFIX = 'pre-trained_models/'
MODEL_PATH = ['bert_base_uncased', 
              'scibert_scivocab_uncased', 
              'biobert_v1.1_pubmed']
MODEL_NAME = ['BERT', 'SCIBERT', 'BIOBERT']
DATA_PATH = "data"
STATS_PATH = "stats"
PRETRAINING_MODEL_ID = "none"
PRETRAINING_MODEL_TYPE = "none"
COST_SENSITIVITY = 0
KFOLD = 0

DATA_TYPE =  ['skin', 'coral']
NOTES =      ['Skin Cancer', 'Coral Bleaching']
STATS_FILE = ['skin_cancer_fine_tuning_stats.csv', 'coral_bleaching_fine_tuning_stats.csv']


Make sure PyTorch is installed - will use with Hugging Face Transformers
<br>Hugging Face library currently accepted as most powerful PyTorch interface with BERT



In [None]:
# Install
!pip install pytorch-pretrained-bert pytorch-nlp

In [None]:
DATA_TYPE_TO_USE = DATA_TYPE[0]
DATA_NAME = ""

if DATA_TYPE_TO_USE == "skin":
    DATA_NAME = "EBA1415-SkinCancer-big-sentences.tsv"
elif DATA_TYPE_TO_USE == "coral":
    DATA_NAME = "EBA1415-CoralBleaching-big-sentences.tsv"
else:
    print("DATA_TYPE_TO_USE must be set to either 'coral' or 'skin.'")

h = 0 if DATA_TYPE_TO_USE == "skin" else None
df = pd.read_csv(DATA_PATH + "/" + DATA_NAME, delimiter='\t', header=h, names=['essay', 'relation', 's_num', 'sentence'])
df.head(10)

Must transform relation labels to binary labels.

In [None]:
relations_pd = df.relation.copy(deep=True)

coral_relations = [
                   "1,2", "1,3", "1,4", "1,5", "1,5B", "1,14", "1,6", "1,7", "1,50",
                   "2,3", "2,4", "2,5", "2,5B", "2,14", "2,6", "2,7", "2,50",
                   "3,4", "3,5", "3,5B", "3,14", "3,6", "3,7", "3,50",
                   "4,5", "4,5B", "4,14", "4,6", "4,7", "4,50",
                   "5,5B", "5,14", "5,6", "5,7", "5,50",
                   "5B,14", "5B,6", "5B,7", "5B,50",
                   "11,12", "11,13", "11,14", "11,6", "11,7", "11,50",
                   "12,13", "12,14", "12,6", "12,7", "12,50",
                   "13,14", "13,6", "13,7","13,50",
                   "14,6", "14,7", "14,50",
                   "6,7", "6,50",
                   "7,50"
                  ]
print("{} unique coral bleaching relations".format(len(coral_relations)))

skin_relations = [
                  "1,2", "1,3", "1,4", "1,5", "1,6", "1,50",
                  "2,3", "2,4", "2,5", "2,6", "2,50",
                  "3,4", "3,5", "3,6", "3,50",
                  "4,5", "4,6", "4,50",
                  "5,6", "5,50",
                  "11,12", "11,6", "11,50",
                  "12,6", "12,50",
                  "6,50"     
                 ]

print("{} unique skin cancer relations".format(len(skin_relations)))

for i, rel in relations_pd.items():
    chain = rel.split("-")
    if chain[0] != "O":
        chain = chain[1] + "," + chain[2]
        
        if DATA_TYPE == "coral":
            if chain in coral_relations:
                relations_pd.at[i] = 1
                continue

        elif DATA_TYPE == "skin":
            if chain in skin_relations:
                relations_pd.at[i] = 1
                continue
            
    relations_pd.at[i] = 0

df_binary = df.copy(deep=True)
df_binary.head(10)

In [None]:
df_binary.relation = relations_pd
df_binary.head(10)

Next, we must address the issue that some sentences have multiple relations. This could be a problem if a sentence has one valid relation and one invalid one (the same sentence will be labeled True in one instance and False in another instance). To correct this, we will remove the duplicate instances and define each sentence to be True if it contains *at least one* causal relation.

The parse was provided by @TrentonMcKinney on StackOverflow:
https://stackoverflow.com/questions/63697275/regex-string-for-different-versions/63697498#63697498

In [None]:
df_duplicate_sentences = df_binary[df_binary.s_num.astype(str).str.split('.', expand=True)[1] != '0']
df_duplicate_sentences.head(25)

Now that the duplicates are isolated, they need to be evaluated. If there is at least one relation, one copy of the sentence will be kept as true. If there are no relations, one copy will be kept as false.

In [None]:
current = -1
same_arr_inds = []
drop_list = []

for i, row in df_duplicate_sentences.iterrows():
    s_num = str(df_duplicate_sentences.loc[i].s_num)
    first_num, second_num = s_num.split(".")

    if first_num != current:
        current = first_num

    if len(same_arr_inds) > 1:
        flag = False
        for n in same_arr_inds:
            if df_duplicate_sentences.loc[n].relation == True:
                flag = True
                break

        left = same_arr_inds[0]
        right = same_arr_inds[1:]

        if flag == True:
            df_duplicate_sentences.loc[left].relation = 1
        else:
            df_duplicate_sentences.loc[left].relation = 0

        drop_list += right   

        same_arr_inds = []
        same_arr_inds.append(i)

df_binary.drop(drop_list, inplace=True)   
df_binary.head(25)

Data is prepped and cleaned at this point. Next is implementation. Extract sentences and labels from DataFrame. Must also add special [CLS] and [SEP] tokens for BERT.

Tokenize sentences for BERT.
For each tokenized input sentence, we need to create:

1. input ids:
    a sequence of integers identifying each input token to its index number 
    in the BERT tokenizer vocabulary

2. segment mask: (optional) a sequence of 1s and 0s used to identify whether the input is one 
    sentence or two sentences long. For one sentence inputs, this is simply a sequence of 0s. 
    For two sentence inputs, there is a 0 for each token of the first sentence, followed by a 
    1 for each token of the second sentence

3. attention mask: (optional) 
    a sequence of 1s and 0s, with 1s for all input tokens and 0s for all padding tokens 

4. labels: based on the labels from the data set

Additionally, we will get rid of the sentences greater than MAX_LEN.

In [None]:
def tokenize_sentences(tokenizer, sentences):
    tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]
    print ("First sentence tokenized: ", tokenized_texts[0])
    
    original_length = len(tokenized_texts)
    labels = df_binary.relation.values
    labels = [labels[i] for i in range(len(tokenized_texts)) if len(tokenized_texts[i]) <= MAX_LEN]
    tokenized_texts = [tokenized_texts[i] for i in range(len(tokenized_texts)) if len(tokenized_texts[i]) <= MAX_LEN]
    print("Removed {0} sentences greater than {1}".format(original_length - len(tokenized_texts),MAX_LEN))
    
    # Convert BERT tokens to corresponding ID numbers in BERT vocabulary. After conversion, pad the sequences.
    input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]
    input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")
    
    # Create attention masks.
    attention_masks = []
    for seq in input_ids:
        seq_mask = [float(i>0) for i in seq]
        attention_masks.append(seq_mask)

    return input_ids, labels, attention_masks

In [None]:
# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [None]:
def getLastModelNumber(stats_file):
    try:
        with open(STATS_PATH + "/" + stats_file, "r") as f:
            f_list = list(f)
            latest = f_list[-1].split(',')
            return int(latest[0])
    except:
        return -1

In [None]:
def train_test_validate_model(epochs, batch_size, notes, data_type, stats_file, sentences, tokenizer):
    for model_index, model_path in enumerate(MODEL_PATH):
        # having issues with BERT, use the others.
        if (model_index == 0): 
            continue
            
        start_date_raw = datetime.datetime.now(tz = pytz.timezone('US/Central'))
        start_date = str(start_date_raw)
        date = start_date.split(' ')
        time = date[1]
        date = date[0]
        h, m = [time.split(':')[0], time.split(':')[1]]

        DATE_TIME = date + ' ' + h + ':' + m + " CT"
        #print(DATE_TIME)

        input_ids, labels, attention_masks = tokenize_sentences(tokenizer, sentences)

        # Split data into train, validation, test.
        train_inputs, validation_inputs, train_labels, validation_labels = \
            train_test_split(input_ids, labels, random_state=2018, test_size=0.1)
        train_masks, validation_masks, _, _ = \
            train_test_split(attention_masks, input_ids, random_state=2018, test_size=0.1)

        # Convert sets into Torch tensors.
        train_inputs = torch.tensor(train_inputs)
        validation_inputs = torch.tensor(validation_inputs)
        train_labels = torch.tensor(train_labels)
        validation_labels = torch.tensor(validation_labels)
        train_masks = torch.tensor(train_masks)
        validation_masks = torch.tensor(validation_masks)

        # Create an iterator of our data with torch DataLoader. This helps save on memory during training because, 
        # unlike a for loop, with an iterator the entire dataset does not need to be loaded into memory

        train_data = TensorDataset(train_inputs, train_masks, train_labels)
        train_sampler = RandomSampler(train_data)
        train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

        validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
        validation_sampler = SequentialSampler(validation_data)
        validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)

        # If the model is not compiled, use PyTorch to create a pytorch_model.bin
        path_bert = MODEL_PATH_PREFIX + model_path + '/'
        path_bin = path_bert + 'pytorch_model.bin'
        if (not os.path.exists(path_bin)):
            if (model_index == 0) :
                model_ckpt = "bert_model.ckpt.data-00000-of-00001"
            else:
                model_ckpt = "model.ckpt-1000000"
            convert_tf_checkpoint_to_pytorch(path_bert + model_ckpt, 
                                             path_bert + "bert_config.json", 
                                             path_bert + "pytorch_model.bin")

        # This is where the fine-tuning comes in. We must train the model for our specific task.
        # We will first modify pre-trained BERT for our specific task, then continue training on our data until the entire model
        # is well-suited for our task.
        model = BertForSequenceClassification.from_pretrained(MODEL_PATH_PREFIX + model_path, num_labels=len(labels))
        model.cuda()

        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
        optimizer = BertAdam(optimizer_grouped_parameters, lr=2e-5, warmup=WARMUP)

        t = [] 
        train_loss_set = [] # Store our loss and accuracy for plotting

        # trange is a tqdm wrapper around the normal python range
        for _ in trange(epochs, desc="Epoch"):
            # **************** Training ****************
            # Set our model to training mode (as opposed to evaluation mode)
            model.train()

            # Tracking variables
            tr_loss = 0
            nb_tr_examples, nb_tr_steps = 0, 0

            # Train the data for one epoch
            for step, batch in enumerate(train_dataloader):
                # Add batch to GPU
                batch = tuple(t.to(device) for t in batch)
                # Unpack the inputs from our dataloader
                b_input_ids, b_input_mask, b_labels = batch
                b_input_ids = torch.tensor(b_input_ids).to(torch.int64) # from https://github.com/huggingface/transformers/issues/2952
                # Clear out the gradients (by default they accumulate)
                optimizer.zero_grad()
                # Forward pass
                loss = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
                train_loss_set.append(loss.item())    
                # Backward pass
                loss.backward()
                # Update parameters and take a step using the computed gradient
                optimizer.step()

                # Update tracking variables
                tr_loss += loss.item()
                nb_tr_examples += b_input_ids.size(0)
                nb_tr_steps += 1

            print("Train loss: {}".format(tr_loss/nb_tr_steps))

            # **************** Validation ****************

            # Put model in evaluation mode to evaluate loss on the validation set
            model.eval()

            # Tracking variables 
            eval_loss, eval_accuracy = 0, 0
            nb_eval_steps, nb_eval_examples = 0, 0

            # Evaluate data for one epoch
            for batch in validation_dataloader:
                # Add batch to GPU
                batch = tuple(t.to(device) for t in batch)
                # Unpack the inputs from our dataloader
                b_input_ids, b_input_mask, b_labels = batch
                b_input_ids = torch.tensor(b_input_ids).to(torch.int64) # from https://github.com/huggingface/transformers/issues/2952
                # Telling the model not to compute or store gradients, saving memory and speeding up validation
                with torch.no_grad():
                    # Forward pass, calculate logit predictions
                    logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)

                # Move logits and labels to CPU
                logits = logits.detach().cpu().numpy()
                label_ids = b_labels.to('cpu').numpy()

                tmp_eval_accuracy = flat_accuracy(logits, label_ids)

                eval_accuracy += tmp_eval_accuracy
                nb_eval_steps += 1

            print("Validation Accuracy: {}".format(eval_accuracy/nb_eval_steps))

        # Now we will validate on our testing data
        if DATA_TYPE_TO_USE == "skin":
            DATA_NAME = "EBA1415-SkinCancer-little-sentences.tsv"
        elif DATA_TYPE_TO_USER == "coral":
            DATA_NAME = "EBA1415-CoralBleaching-little-sentences.tsv"
        else:
            print("DATA_TYPE_TO_USER must be set to either 'coral' or 'skin'")

        h = 0 if DATA_TYPE == DATA_TYPE_TO_USE else None
        df_test = pd.read_csv(DATA_PATH  + "/" + DATA_NAME, delimiter='\t', header=h, names=['essay', 'relation', 's_num', 'sentence'])

        # Create sentence and label lists
        sentences_test = df_test.sentence.values

        # We need to add special tokens at the beginning and end of each sentence for BERT to work properly
        sentences_test = ["[CLS] " + sentence + " [SEP]" for sentence in sentences_test]

        tokenized_texts_test = [tokenizer.tokenize(sentence) for sentence in sentences_test]

        # Get labels
        input_ids_test, labels_test, attention_masks_test = tokenize_sentences(tokenizer, sentences_test)

        prediction_inputs = torch.tensor(input_ids_test)
        prediction_masks = torch.tensor(attention_masks_test)
        prediction_labels = torch.tensor(labels_test)
        prediction_data = TensorDataset(prediction_inputs, prediction_masks, prediction_labels)
        prediction_sampler = SequentialSampler(prediction_data)
        prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)

        # *************************** Prediction on test set ***************************
        # Put model in evaluation mode
        model.eval()

        # Tracking variables 
        predictions_test, true_labels_test = [], []

        # Predict 
        for batch in prediction_dataloader:
            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)
            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_labels = batch
            b_input_ids = torch.tensor(b_input_ids).to(torch.int64) # from https://github.com/huggingface/transformers/issues/2952
            # Telling the model not to compute or store gradients, saving memory and speeding up prediction
            with torch.no_grad():
                # Forward pass, calculate logit predictions
                logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)

            # Move logits and labels to CPU
            logits_test = logits.detach().cpu().numpy()
            label_ids_test = b_labels.to('cpu').numpy()

            # Store predictions and true labels
            predictions_test.append(logits_test)
            true_labels_test.append(label_ids_test)


        # Flatten the predictions and true values
        flat_predictions_test = [item for sublist in predictions_test for item in sublist]
        flat_predictions_test = np.argmax(flat_predictions_test, axis=1).flatten()
        flat_true_labels_test = [item for sublist in true_labels_test for item in sublist]

        # Create the file to store the stats of the model if it doesn't already exist
        f = None
        if not os.path.isfile(STATS_PATH + "/" + stats_file):
            f = open(STATS_PATH + "/" + stats_file, "w")
            f.write("number,datetime,bert_model,hugging_face,max_len,epochs,batch_size,\
                optimizer,learning_rate,warmup,pretraining_model_id,pretraining_model_type,\
                cost_sensitivity,accuracy,macro_prec,macro_recall,macro_f1,macro_support,\
                weighted_prec,weighted_recall,weighted_f1,weighted_support,kfold,notes\n")
            print("skin_cancer_stats.csv NOT found - creating")
            f.close()

        y_pred = flat_predictions_test
        y = flat_true_labels_test

        classification_dict = classification_report(y, y_pred, labels=None, target_names=None, \
                                                    sample_weight=None, digits=2, output_dict=True, zero_division=1)

        # Create arrays of precisions, recalls, f1s to recalculate average
        precisions = []
        recalls = []
        f1s = []
        supports = []

        # We must alter the classification dictionary to update the f1_score and recall keys to be 1 instead of 0
        # In this example, precision/recall/f1 scores of 0 are ignored

        classification_dict_stripped = {}

        for k,v in classification_dict.items():
            if k.isdigit():
                if float(v['precision']) != 0.0 and float(v['recall']) != 0.0 and float(v['f1-score']) != 0.0:
                    recalls.append(v['recall'])
                    f1s.append(v['f1-score'])
                    precisions.append(v['precision'])
                    supports.append(v['support'])

                    # Convert dictionary keys back to our original labels
                    original_key = next((key for key in label_types if label_types[key] == int(k)), None)
                    classification_dict_stripped.update({ original_key : v })

            # Otherwise, we are at the end of the dict and edit the averages to account for the newly replaced 0s
            else:
                if k == 'macro avg':
                    precision = sum(precisions)/len(precisions)
                    recall = sum(recalls)/len(recalls)
                    f1 = sum(f1s)/len(f1s)

                    v['precision'] = precision
                    v['recall'] = recall
                    v['f1-score'] = f1

                if k == 'weighted avg':
                    weighted_precisions = [precisions[i]*supports[i] for i in range(len(precisions))]
                    weighted_recalls = [recalls[i]*supports[i] for i in range(len(recalls))]
                    weighted_f1s = [f1s[i]*supports[i] for i in range(len(f1s))]

                    total_supports = sum(supports)

                    precision = sum(weighted_precisions)/total_supports
                    recall = sum(weighted_recalls)/total_supports
                    f1 = sum(weighted_f1s)/total_supports

                v['precision'] = precision
                v['recall'] = recall
                v['f1-score'] = f1
                v['support'] = total_supports

            classification_dict_stripped.update({ k : v })

        ACCURACY = classification_dict_stripped['accuracy']

        macro = classification_dict_stripped["macro avg"]
        MACRO_F1 = macro["f1-score"]
        MACRO_PREC = macro["precision"]
        MACRO_RECALL = macro["recall"]
        MACRO_SUPPORT = macro["support"]

        weighted = classification_dict_stripped["weighted avg"]
        WEIGHTED_F1 = weighted["f1-score"]
        WEIGHTED_PREC = weighted["precision"]
        WEIGHTED_RECALL = weighted["recall"]
        WEIGHTED_SUPPORT = weighted["support"]

        # Capture HuggingFace type
        hf_arr = str(type(model)).split('.')
        HF_TYPE = hf_arr[2]
        HF_TYPE = ''.join(filter(str.isalnum, HF_TYPE))
        HF_TYPE

        # Capture optimizer type
        opt_arr = str(type(optimizer)).split('.')
        OPTIMIZER_TYPE = opt_arr[2]
        OPTIMIZER_TYPE = ''.join(filter(str.isalnum, OPTIMIZER_TYPE))
        OPTIMIZER_TYPE

        date_raw = datetime.datetime.now(tz = pytz.timezone('US/Central'))
        date = str(date_raw)
        date = date.split(' ')
        time = date[1]
        date = date[0]
        h, m = [time.split(':')[0], time.split(':')[1]]

        DATE_TIME = date + ' ' + h + ':' + m + " CT"
        print(DATE_TIME)

        elapsedTime = date_raw - start_date_raw
        minutes, seconds = divmod(elapsedTime.total_seconds(), 60)
        print("minutes {}: seconds {}".format(minutes, seconds))

        NUM = int(current_file_n_str)

        # Add line to stats, then save and close
        with open(STATS_PATH + "/" + stats_file, "a") as f:
            f.write("{0},{1},{2},{3},{4},{5},{6},{7},{8},\
            {9},{10},{11},{12},{13},{14},{15},{16},{17},\
            {18},{19},{20},{21},{22},{23}\n".format(NUM,DATE_TIME,model_path,HF_TYPE,MAX_LEN,epocs,batch_size,
                                                  OPTIMIZER_TYPE,LEARNING_RATE,WARMUP,PRETRAINING_MODEL_ID,PRETRAINING_MODEL_TYPE,
                                                  COST_SENSITIVITY,ACCURACY,MACRO_PREC,MACRO_RECALL,MACRO_F1,MACRO_SUPPORT,
                                                  WEIGHTED_PREC,WEIGHTED_RECALL,WEIGHTED_F1,WEIGHTED_SUPPORT,KFOLD,notes))



In [None]:
sentences = df_binary.sentence.values
sentences = ["[CLS] " + sentence + " [SEP]" for sentence in sentences]
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

# Main processing loop
for i, data_type in enumerate(DATA_TYPE):
    for epochs in EPOCHS:
        for batch_size in BATCH_SIZE:
            train_test_validate_model(epochs, batch_size, NOTES[i], DATA_TYPE[i], STATS_FILE[i], sentences, tokenizer)
        