### All Easy DA - BERT Large Uncased

#### Un-augmented test set
#### Augment only the training set

#### Get Original Paper Data

In [1]:
# !pip install sklearn
# !pip install ekphrasis
# !pip install transformers
# !pip install spacy
# !python -m spacy download en_core_web_sm

In [2]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds

import transformers

from transformers import BertTokenizer, TFBertModel
from tensorflow.keras import backend as K
from tensorflow import keras

from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

import os

import logging
tf.get_logger().setLevel(logging.ERROR)

In [3]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [4]:
encoder = LabelEncoder()
encoder.classes_ = np.load('../Data/classes.npy', allow_pickle=True)

In [5]:
tf.__version__

'2.6.0'

In [6]:
transformers.__version__

'4.16.2'

In [7]:
# test, train, dev examples from base notebook

train_data_df = pd.read_csv('./Saved_Models/EDA_base_uncased/All_DA_BERT_base_uncased_train_examples.csv')
dev_data_df = pd.read_csv('./Saved_Models/EDA_base_uncased/All_DA_BERT_base_uncased_dev_examples.csv')
test_data_df = pd.read_csv('./Saved_Models/EDA_base_uncased/All_DA_BERT_base_uncased_test_examples.csv')

In [8]:
train_data_df.head()

Unnamed: 0.1,Unnamed: 0,post_id,text,target1,target2,target3,rationales,final_label,text_combined
0,0,22448349_gab,"['common', 'core', 'weed', 'too', 'much', 'rit...","['Men', 'Women']",['Women'],['None'],[],normal,common core weed too much ritalan chem trails ...
1,1,1178948520201637888_twitter,"['took', 'my', 'nan', 'to', 'the', 'hospital',...",['None'],['None'],['None'],[],normal,took my nan to the hospital for a x ray i turn...
2,2,1482573_gab,"['<user>', 'well', 'not', 'really', 'islam', '...",['Islam'],['Other'],['Islam'],"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,...",offensive,<user> well not really islam does not care for...
3,3,1097184028149587969_twitter,"['<user>', 'france', 'in', '<number>', 'after'...","['Islam', 'Other']",['Islam'],['Islam'],[],normal,<user> france in <number> after muslims take o...
4,4,1089569255111176192_twitter,"['i', 'will', 'not', 'tolerate', 'non', 'arab'...","['Arab', 'Men', 'Women']",['Arab'],"['Arab', 'Islam']","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,...",hatespeech,i will not tolerate non arab women slandering ...


In [9]:
X_train_id = train_data_df['post_id']
X_test_id = test_data_df['post_id']
X_dev_id = dev_data_df['post_id']

In [10]:
y_train = train_data_df['final_label']
y_test = test_data_df['final_label']
y_dev = dev_data_df['final_label']

In [28]:
x_train_df = pd.DataFrame({'post_id' : X_train_id.to_list()})
x_dev_df = pd.DataFrame({'post_id' : X_dev_id.to_list()})
x_test_df = pd.DataFrame({'post_id' : X_test_id.to_list()})

# X_train_df = pd.merge(x_train_df, raw_data_final, how='inner', on='post_id')
# X_dev_df = pd.merge(x_dev_df, raw_data_final, how='inner', on='post_id')
# X_test_df = pd.merge(x_test_df, raw_data_final, how='inner', on='post_id')

X_train_text = train_data_df['text_combined'].to_list()
X_dev_text= dev_data_df['text_combined'].to_list()
X_test_text = test_data_df['text_combined'].to_list()

print(len(X_train_text))
print(len(X_dev_text))
print(len(X_test_text))

15383
1923
1923


### Get Augmented Data

In [29]:
# load augmented datasets generated by EDA
# sr = synonym replacement
# ri = random synonym insertion
# rs = random swap
# rd = random deletion
# dataframe name format: method_number 

sr_1_df = pd.read_csv('../test_data_set/EDA_5_0_7_sr_rest_0_1.csv')
ri_1_df = pd.read_csv('../test_data_set/EDA_5_0_7_ri_rest_0_1.csv')
rs_1_df = pd.read_csv('../test_data_set/EDA_5_0_7_rs_rest_0_1.csv')
rd_1_df = pd.read_csv('../test_data_set/EDA_5_0_7_rd_rest_0_1.csv')
all_1_df = pd.read_csv('../test_data_set/EDA_5_all_0_1s.csv')
all_5_df = pd.read_csv('../test_data_set/EDA_5_all_0_5s.csv')

# remove undecided labeled examples
sr_1_df_filtered = sr_1_df[sr_1_df['final_label'] != 'undecided']
ri_1_df_filtered = ri_1_df[ri_1_df['final_label'] != 'undecided']
rs_1_df_filtered = rs_1_df[rs_1_df['final_label'] != 'undecided']
rd_1_df_filtered = rd_1_df[rd_1_df['final_label'] != 'undecided']
all_1_df_filtered = all_1_df[all_1_df['final_label'] != 'undecided']
all_5_df_filtered = all_5_df[all_5_df['final_label'] != 'undecided']

len(sr_1_df_filtered)

115374

In [30]:
# separate train, dev, test for each set
sr_1_df_train = sr_1_df_filtered[sr_1_df_filtered['post_id'].isin(X_train_id)]
ri_1_df_train = ri_1_df_filtered[ri_1_df_filtered['post_id'].isin(X_train_id)]
rs_1_df_train = rs_1_df_filtered[rs_1_df_filtered['post_id'].isin(X_train_id)]
rd_1_df_train = rd_1_df_filtered[rd_1_df_filtered['post_id'].isin(X_train_id)]
all_1_df_train = all_1_df_filtered[all_1_df_filtered['post_id'].isin(X_train_id)]
all_5_df_train = all_5_df_filtered[all_5_df_filtered['post_id'].isin(X_train_id)]

# select text sets

aug_sr_text = sr_1_df_train['text_str'].to_list()
aug_ri_text = ri_1_df_train['text_str'].to_list()
aug_rs_text = rs_1_df_train['text_str'].to_list()
aug_rd_text = rd_1_df_train['text_str'].to_list()
aug_all_1_text = all_1_df_train['text_str'].to_list()
aug_all_5_text = all_5_df_train['text_str'].to_list()

# select label sets

aug_sr_labels = sr_1_df_train['final_label']
aug_ri_labels = ri_1_df_train['final_label']
aug_rs_labels = rs_1_df_train['final_label']
aug_rd_labels = rd_1_df_train['final_label']
aug_all_1_labels = all_1_df_train['final_label']
aug_all_5_labels = all_5_df_train['final_label']

len(aug_sr_text)

92298

#### Convert labels to one-hot encoding

In [31]:
# convert class label to 1 hot encoding

def convert_to_oh(S):
    '''takes a pandas series of text labels and returns one hot encoding equivalent
    0 = normal, 1 = offensive, 2 = hatespeech
    ''' 
    S_numerical = S.apply(lambda x: 0 if x=='normal' else (1 if x=='offensive' else 2))
    S_oh = keras.utils.to_categorical(S_numerical, num_classes = 3, dtype = 'float32')
    return S_oh
    
# original dataset - train, dev, and train
y_train_orig = convert_to_oh(pd.Series(y_train))
y_dev_orig = convert_to_oh(pd.Series(y_dev))
y_test_orig = convert_to_oh(pd.Series(y_test))

# augmented with sr = 0.1
y_train_aug_sr = convert_to_oh(aug_sr_labels)

# augmented with ri = 0.1
y_train_aug_ri = convert_to_oh(aug_ri_labels)

# augmented with rs = 0.1
y_train_aug_rs = convert_to_oh(aug_rs_labels)

# augmented with rd = 0.1
y_train_aug_rd = convert_to_oh(aug_rd_labels)

# augmented with all = 0.1
y_train_all_1 = convert_to_oh(aug_all_1_labels)

# augmented with all = 0.5
y_train_all_5 = convert_to_oh(aug_all_5_labels)


In [32]:
len(aug_ri_text)

92298

In [33]:
len(y_train_aug_ri)

92298

In [34]:
y_train_aug_ri

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       ...,
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.]], dtype=float32)

### BERT Model

In [35]:
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
#bert_model = TFBertModel.from_pretrained('bert-base-uncased')

In [36]:
aug_sr_text

['i dont suppose im come my child them edward white he has edward white watt second and nike not regular contact',
 'i dont think im getting my baby them white he has two white j and nikes not even touched',
 'i think im getting my baby them white he has two white j and nikes not even touched',
 'i dont think im getting my baby them j he has two white white and nikes not even touched',
 'i dont think im getting my baby them white he has two white j and nikes not even touched',
 'i dont think im getting my baby them white he has two white j and nikes not even touched',
 'we arent continue calling ourselves feminists if the rights of all womxn to addressed yes list a sexual offences public cannot but will a trans lesbian bisexual on queer womxn be able to enter their information and the reporting sheet gender forum',
 'we cannot continue feminists ourselves calling if the be of list womxn arent addressed yes to a sexual offences public but all will a trans lesbian bisexual and queer womx

In [37]:
max_length = 128

def bert_tokenize(train_set, dev_set, test_set, max_length):
    
    train = tokenizer(train_set, max_length=max_length, truncation=True, padding='max_length', return_tensors='tf')
    dev = tokenizer(dev_set, max_length=max_length, truncation=True, padding='max_length', return_tensors='tf')
    test = tokenizer(test_set, max_length=max_length, truncation=True, padding='max_length', return_tensors='tf')
    
    return train, dev, test

X_train_orig, X_dev_orig, X_test_orig = bert_tokenize(X_train_text, X_dev_text, X_test_text, max_length)

X_train_aug_sr, X_dev_aug_sr, X_test_aug_sr = bert_tokenize(aug_sr_text, X_dev_text, X_test_text, max_length)

X_train_aug_ri, X_dev_aug_ri, X_test_aug_ri = bert_tokenize(aug_ri_text, X_dev_text, X_test_text, max_length)

X_train_aug_rs, X_dev_aug_rs, X_test_aug_rs = bert_tokenize(aug_rs_text, X_dev_text, X_test_text, max_length)

X_train_aug_rd, X_dev_aug_rd, X_test_aug_rd = bert_tokenize(aug_rd_text, X_dev_text, X_test_text, max_length)

X_train_all_1, X_dev_all_1, X_test_all_1 = bert_tokenize(aug_all_1_text, X_dev_text, X_test_text, max_length)

X_train_all_5, X_dev_all_5, X_test_all_5 = bert_tokenize(aug_all_5_text, X_dev_text, X_test_text, max_length)


In [38]:
#tokenizer.save_pretrained("./Tokenizer_ALL_EDA_BERT_base_uncased")

In [39]:
X_train_orig.keys()

dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])

In [40]:
X_train_aug_sr.input_ids

<tf.Tensor: shape=(92298, 128), dtype=int32, numpy=
array([[ 101, 1045, 2123, ...,    0,    0,    0],
       [ 101, 1045, 2123, ...,    0,    0,    0],
       [ 101, 1045, 2228, ...,    0,    0,    0],
       ...,
       [ 101, 1996, 3795, ...,    0,    0,    0],
       [ 101, 1996, 3644, ...,    0,    0,    0],
       [ 101, 1996, 3644, ...,    0,    0,    0]])>

In [41]:
X_train_aug_ri.token_type_ids

<tf.Tensor: shape=(92298, 128), dtype=int32, numpy=
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])>

In [42]:
X_train_all_1.attention_mask

<tf.Tensor: shape=(92298, 128), dtype=int32, numpy=
array([[1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0]])>

In [43]:
X_train_all_5.input_ids

<tf.Tensor: shape=(92298, 128), dtype=int32, numpy=
array([[  101,  1045,  2123, ...,     0,     0,     0],
       [  101,  2893,  2123, ...,     0,     0,     0],
       [  101,  1045,  2123, ...,     0,     0,     0],
       ...,
       [  101,  1996, 10620, ...,     0,     0,     0],
       [  101,  1996,  3644, ...,     0,     0,     0],
       [  101,  1996,  3644, ...,     0,     0,     0]])>

In [44]:
from keras import backend as K

def balanced_recall(y_true, y_pred):
    """This function calculates the balanced recall metric
    recall = TP / (TP + FN)
    """
    recall_by_class = 0
    # iterate over each predicted class to get class-specific metric
    for i in range(y_pred.shape[1]):
        y_pred_class = y_pred[:, i]
        y_true_class = y_true[:, i]
        true_positives = K.sum(K.round(K.clip(y_true_class * y_pred_class, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true_class, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        recall_by_class = recall_by_class + recall
    return recall_by_class / y_pred.shape[1]

def balanced_precision(y_true, y_pred):
    """This function calculates the balanced precision metric
    precision = TP / (TP + FP)
    """
    precision_by_class = 0
    # iterate over each predicted class to get class-specific metric
    for i in range(y_pred.shape[1]):
        y_pred_class = y_pred[:, i]
        y_true_class = y_true[:, i]
        true_positives = K.sum(K.round(K.clip(y_true_class * y_pred_class, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred_class, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        precision_by_class = precision_by_class + precision
    # return average balanced metric for each class
    return precision_by_class / y_pred.shape[1]

def balanced_f1_score(y_true, y_pred):
    """This function calculates the F1 score metric"""
    precision = balanced_precision(y_true, y_pred)
    recall = balanced_recall(y_true, y_pred)
    return 2 * ((precision * recall) / (precision + recall + K.epsilon()))

In [45]:
def create_classification_model(bert_model, hidden_size = 5, 
                                train_layers = -1, 
                                optimizer=tf.keras.optimizers.Adam()):
    """
    Build a simple classification model with BERT. Let's keep it simple and don't add dropout, layer norms, etc.
    """

    input_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='input_ids_layer')
    token_type_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='token_type_ids_layer')
    attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='attention_mask_layer')

    bert_inputs = {'input_ids': input_ids,
                  'token_type_ids': token_type_ids,
                  'attention_mask': attention_mask}


    #restrict training to the train_layers outer transformer layers
    if not train_layers == -1:

            retrain_layers = []

            for retrain_layer_number in range(train_layers):

                layer_code = '_' + str(11 - retrain_layer_number)
                retrain_layers.append(layer_code)

            for w in bert_model.weights:
                if not any([x in w.name for x in retrain_layers]):
                    w._trainable = False


    bert_out = bert_model(bert_inputs)
    
    net = bert_out[0]
    
    classification_token = tf.keras.layers.Lambda(lambda x: x[:,0,:], name='get_first_vector')(net)
    
    dropout1 = tf.keras.layers.Dropout(0.4, name="dropout1")(classification_token)
    
    hidden = tf.keras.layers.Dense(hidden_size, name='hidden_layer')(dropout1)
    
    dropout2 = tf.keras.layers.Dropout(0.4, name="dropout2")(hidden)

    classification = tf.keras.layers.Dense(3, activation='sigmoid',name='classification_layer')(dropout2)

    classification_model = tf.keras.Model(inputs=[input_ids, token_type_ids, attention_mask], 
                                          outputs=[classification])
    
    METRICS = [tf.keras.metrics.CategoricalAccuracy(name="accuracy"), 
               balanced_recall, 
               balanced_precision, 
               balanced_f1_score,
               tf.keras.metrics.AUC(curve='ROC', name="auc_roc")]
    
    
    classification_model.compile(optimizer=optimizer,
                            loss=tf.keras.losses.CategoricalCrossentropy(),
                            metrics= METRICS)


    return classification_model




#     classification_model.compile(optimizer=optimizer,
#                             loss=tf.keras.losses.CategoricalCrossentropy(),
#                             metrics=tf.keras.metrics.CategoricalAccuracy('accuracy'))

In [49]:
def fine_tune_BERT(x_train, x_dev, x_test, y_train, y_dev, y_test, name, learning_rate = 5e-05, 
                   epsilon=1e-08, train_layers = -1, epochs = 10, batch_size = 16):
    ''' Fine tunes BERT base uncased with given data, allows your to set some hyperparameters
        returns test set accuracy, f1 score, and AUC_ROC score
    '''
    try:
        del classification_model
    except:
        pass

    try:
        del bert_model
    except:
        pass
    
    tf.keras.backend.clear_session()
    bert_model = TFBertModel.from_pretrained('bert-large-uncased')

    # early stopping callback
    
    earlystop_callback = tf.keras.callbacks.EarlyStopping(monitor = 'val_accuracy', 
                                                      patience = 4,
                                                      restore_best_weights = True)
    
    # Create a callback that saves the model's weights
    
    path_name = './Saved_Models/EDA_larg_uncased/' + name + '/' + name

    cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=path_name, 
                                                     save_weights_only=True,
                                                     verbose=1,
                                                     monitor='val_accuracy',
                                                     save_best_only=True)
    
    # create classification model
    classification_model = create_classification_model(bert_model, 
                                                       optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=epsilon),
                                                       train_layers=train_layers)    
    
    model_fit = classification_model.fit([x_train.input_ids, x_train.token_type_ids, x_train.attention_mask],
                         y_train,
                         validation_data=([x_dev.input_ids, x_dev.token_type_ids, x_dev.attention_mask],
                         y_dev),
                        epochs=epochs,
                        batch_size=batch_size,
                        callbacks = [earlystop_callback, cp_callback])
    
    y_preds_array = classification_model.predict([x_test.input_ids, x_test.token_type_ids, x_test.attention_mask])

    # convert to predicted one-hot encoding

    from keras.utils.np_utils import to_categorical
    y_preds = to_categorical(np.argmax(y_preds_array, 1), dtype = "int64")

    # convert back to labels

    y_test_cat = np.argmax(y_test, axis=1)
    y_preds_cat = np.argmax(y_preds, axis=1)
    
    # calculate metrics
    Accuracy = accuracy_score(y_test_cat, y_preds_cat)

    Macro_F1 = f1_score(y_test_cat, y_preds_cat, average='macro')

    ROC_AUC = roc_auc_score(y_test, y_preds, multi_class='ovo',average='macro')
    
    metrics_history = model_fit.history
    
    return Accuracy, Macro_F1, ROC_AUC, metrics_history

In [52]:
%%time
# original data set
Accuracy_orig, Macro_F1_orig, ROC_AUC_orig, metrics_orig = fine_tune_BERT(X_train_orig, X_dev_orig, X_test_orig, 
                                                            y_train_orig, y_dev_orig, y_test_orig, 'orig_data_large',
                                                            learning_rate = 5e-05, epsilon=1e-08, 
                                                            train_layers = 1, epochs = 30, batch_size = 64)

Some layers from the model checkpoint at bert-large-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-large-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Epoch 1/30

Epoch 00001: val_accuracy improved from -inf to 0.46698, saving model to ./Saved_Models/EDA_larg_uncased/orig_data_large\orig_data_large
Epoch 2/30

Epoch 00002: val_accuracy improved from 0.46698 to 0.52262, saving model to ./Saved_Models/EDA_larg_uncased/orig_data_large\orig_data_large
Epoch 3/30

Epoch 00003: val_accuracy improved from 0.52262 to 0.56838, saving model to ./Saved_Models/EDA_larg_uncased/orig_data_large\orig_data_large
Epoch 4/30

Epoch 00004: val_accuracy improved from 0.56838 to 0.60530, saving model to ./Saved_Models/EDA_larg_uncased/orig_data_large\orig_data_large
Epoch 5/30

Epoch 00005: val_accuracy improved from 0.60530 to 0.62090, saving model to ./Saved_Models/EDA_larg_uncased/orig_data_large\orig_data_large
Epoch 6/30

Epoch 00006: val_accuracy improved from 0.62090 to 0.63495, saving model to ./Saved_Models/EDA_larg_uncased/orig_data_large\orig_data_large
Epoch 7/30

Epoch 00007: val_accuracy improved from 0.63495 to 0.63755, saving model to ./S

In [53]:
%%time
# augmented with sr = 0.1
Accuracy_aug_sr, Macro_F1_aug_sr, ROC_AUC_aug_sr, metrics_sr = fine_tune_BERT(X_train_aug_sr, X_dev_aug_sr, X_test_aug_sr, 
                                                            y_train_aug_sr, y_dev_orig, y_test_orig, 'EDA_sr_large', 
                                                            learning_rate = 5e-05, epsilon=1e-08, 
                                                            train_layers = 1, epochs = 30, batch_size = 64)

Some layers from the model checkpoint at bert-large-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-large-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Epoch 1/30

Epoch 00001: val_accuracy improved from -inf to 0.62611, saving model to ./Saved_Models/EDA_larg_uncased/EDA_sr_large\EDA_sr_large
Epoch 2/30

Epoch 00002: val_accuracy improved from 0.62611 to 0.65783, saving model to ./Saved_Models/EDA_larg_uncased/EDA_sr_large\EDA_sr_large
Epoch 3/30

Epoch 00003: val_accuracy improved from 0.65783 to 0.66771, saving model to ./Saved_Models/EDA_larg_uncased/EDA_sr_large\EDA_sr_large
Epoch 4/30

Epoch 00004: val_accuracy did not improve from 0.66771
Epoch 5/30

Epoch 00005: val_accuracy improved from 0.66771 to 0.67499, saving model to ./Saved_Models/EDA_larg_uncased/EDA_sr_large\EDA_sr_large
Epoch 6/30

Epoch 00006: val_accuracy did not improve from 0.67499
Epoch 7/30

Epoch 00007: val_accuracy did not improve from 0.67499
Epoch 8/30

Epoch 00008: val_accuracy improved from 0.67499 to 0.67655, saving model to ./Saved_Models/EDA_larg_uncased/EDA_sr_large\EDA_sr_large
Epoch 9/30

Epoch 00009: val_accuracy improved from 0.67655 to 0.68071, 

In [64]:
%%time
# augmented with ri = 0.1
Accuracy_aug_ri, Macro_F1_aug_ri, ROC_AUC_aug_ri, metrics_ri = fine_tune_BERT(X_train_aug_ri, X_dev_aug_ri, X_test_aug_ri, 
                                                            y_train_aug_ri, y_dev_orig, y_test_orig, 'EDA_ri_large', 
                                                            learning_rate = 5e-05, epsilon=1e-08, 
                                                            train_layers = 1, epochs = 30, batch_size = 64)

Some layers from the model checkpoint at bert-large-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-large-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Epoch 1/30

Epoch 00001: val_accuracy improved from -inf to 0.57150, saving model to ./Saved_Models/EDA_larg_uncased/EDA_ri_large\EDA_ri_large
Epoch 2/30

Epoch 00002: val_accuracy improved from 0.57150 to 0.64691, saving model to ./Saved_Models/EDA_larg_uncased/EDA_ri_large\EDA_ri_large
Epoch 3/30

Epoch 00003: val_accuracy improved from 0.64691 to 0.66511, saving model to ./Saved_Models/EDA_larg_uncased/EDA_ri_large\EDA_ri_large
Epoch 4/30

Epoch 00004: val_accuracy improved from 0.66511 to 0.67447, saving model to ./Saved_Models/EDA_larg_uncased/EDA_ri_large\EDA_ri_large
Epoch 5/30

Epoch 00005: val_accuracy improved from 0.67447 to 0.68851, saving model to ./Saved_Models/EDA_larg_uncased/EDA_ri_large\EDA_ri_large
Epoch 6/30

Epoch 00006: val_accuracy did not improve from 0.68851
Epoch 7/30

Epoch 00007: val_accuracy did not improve from 0.68851
Epoch 8/30

Epoch 00008: val_accuracy did not improve from 0.68851
Epoch 9/30

Epoch 00009: val_accuracy improved from 0.68851 to 0.69163, 

In [61]:
%%time
# augmented with rs = 0.1
Accuracy_aug_rs, Macro_F1_aug_rs, ROC_AUC_aug_rs, metrics_rs = fine_tune_BERT(X_train_aug_rs, X_dev_aug_rs, X_test_aug_rs, 
                                                            y_train_aug_rs, y_dev_orig, y_test_orig, 'EDA_rs_large',
                                                            learning_rate = 5e-05, epsilon=1e-08, 
                                                            train_layers = 1, epochs = 30, batch_size = 64)

Some layers from the model checkpoint at bert-large-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-large-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Epoch 1/30

Epoch 00001: val_accuracy improved from -inf to 0.61986, saving model to ./Saved_Models/EDA_larg_uncased/EDA_rs_large\EDA_rs_large
Epoch 2/30

Epoch 00002: val_accuracy improved from 0.61986 to 0.65783, saving model to ./Saved_Models/EDA_larg_uncased/EDA_rs_large\EDA_rs_large
Epoch 3/30

Epoch 00003: val_accuracy improved from 0.65783 to 0.67499, saving model to ./Saved_Models/EDA_larg_uncased/EDA_rs_large\EDA_rs_large
Epoch 4/30

Epoch 00004: val_accuracy improved from 0.67499 to 0.67863, saving model to ./Saved_Models/EDA_larg_uncased/EDA_rs_large\EDA_rs_large
Epoch 5/30

Epoch 00005: val_accuracy improved from 0.67863 to 0.68019, saving model to ./Saved_Models/EDA_larg_uncased/EDA_rs_large\EDA_rs_large
Epoch 6/30

Epoch 00006: val_accuracy improved from 0.68019 to 0.68279, saving model to ./Saved_Models/EDA_larg_uncased/EDA_rs_large\EDA_rs_large
Epoch 7/30

Epoch 00007: val_accuracy improved from 0.68279 to 0.68903, saving model to ./Saved_Models/EDA_larg_uncased/EDA_rs_

In [65]:
%%time
# augmented with rd = 0.1
Accuracy_aug_rd, Macro_F1_aug_rd, ROC_AUC_aug_rd, metrics_rd = fine_tune_BERT(X_train_aug_rd, X_dev_aug_rd, X_test_aug_rd, 
                                                            y_train_aug_rd, y_dev_orig, y_test_orig, 'EDA_rd_large',
                                                            learning_rate =5e-05, epsilon=1e-08, 
                                                            train_layers = 1, epochs = 30, batch_size = 64)

Some layers from the model checkpoint at bert-large-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-large-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Epoch 1/30

Epoch 00001: val_accuracy improved from -inf to 0.53978, saving model to ./Saved_Models/EDA_larg_uncased/EDA_rd_large\EDA_rd_large
Epoch 2/30

Epoch 00002: val_accuracy improved from 0.53978 to 0.60010, saving model to ./Saved_Models/EDA_larg_uncased/EDA_rd_large\EDA_rd_large
Epoch 3/30

Epoch 00003: val_accuracy improved from 0.60010 to 0.65003, saving model to ./Saved_Models/EDA_larg_uncased/EDA_rd_large\EDA_rd_large
Epoch 4/30

Epoch 00004: val_accuracy improved from 0.65003 to 0.66511, saving model to ./Saved_Models/EDA_larg_uncased/EDA_rd_large\EDA_rd_large
Epoch 5/30

Epoch 00005: val_accuracy improved from 0.66511 to 0.67395, saving model to ./Saved_Models/EDA_larg_uncased/EDA_rd_large\EDA_rd_large
Epoch 6/30

Epoch 00006: val_accuracy improved from 0.67395 to 0.67655, saving model to ./Saved_Models/EDA_larg_uncased/EDA_rd_large\EDA_rd_large
Epoch 7/30

Epoch 00007: val_accuracy did not improve from 0.67655
Epoch 8/30

Epoch 00008: val_accuracy improved from 0.67655 

In [66]:
%%time
# augmented with all = 0.1
Accuracy_aug_all_1, Macro_F1_aug_all_1, ROC_AUC_aug_all_1, metrics_all_1 = fine_tune_BERT(X_train_all_1, X_dev_all_1, X_test_all_1, 
                                                            y_train_all_1, y_dev_orig, y_test_orig, 'EDA_all_1_lar',
                                                            learning_rate = 5e-05, epsilon=1e-08, 
                                                            train_layers = 1, epochs = 30, batch_size = 64)

Some layers from the model checkpoint at bert-large-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-large-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Epoch 1/30

Epoch 00001: val_accuracy improved from -inf to 0.59022, saving model to ./Saved_Models/EDA_larg_uncased/EDA_all_1_lar\EDA_all_1_lar
Epoch 2/30

Epoch 00002: val_accuracy improved from 0.59022 to 0.64795, saving model to ./Saved_Models/EDA_larg_uncased/EDA_all_1_lar\EDA_all_1_lar
Epoch 3/30

Epoch 00003: val_accuracy improved from 0.64795 to 0.65263, saving model to ./Saved_Models/EDA_larg_uncased/EDA_all_1_lar\EDA_all_1_lar
Epoch 4/30

Epoch 00004: val_accuracy improved from 0.65263 to 0.65627, saving model to ./Saved_Models/EDA_larg_uncased/EDA_all_1_lar\EDA_all_1_lar
Epoch 5/30

Epoch 00005: val_accuracy improved from 0.65627 to 0.66875, saving model to ./Saved_Models/EDA_larg_uncased/EDA_all_1_lar\EDA_all_1_lar
Epoch 6/30

Epoch 00006: val_accuracy improved from 0.66875 to 0.67083, saving model to ./Saved_Models/EDA_larg_uncased/EDA_all_1_lar\EDA_all_1_lar
Epoch 7/30

Epoch 00007: val_accuracy improved from 0.67083 to 0.67551, saving model to ./Saved_Models/EDA_larg_unc

In [62]:
%%time
# augmented with all = 0.5
Accuracy_aug_all_5, Macro_F1_aug_all_5, ROC_AUC_aug_all_5, metrics_all_5 = fine_tune_BERT(X_train_all_5, X_dev_all_5, X_test_all_5, 
                                                            y_train_all_5, y_dev_orig, y_test_orig, 'EDA_all_5_large',
                                                            learning_rate = 5e-05, epsilon=1e-08, 
                                                            train_layers = 1, epochs = 30, batch_size = 64)

Some layers from the model checkpoint at bert-large-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-large-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Epoch 1/30

Epoch 00001: val_accuracy improved from -inf to 0.59594, saving model to ./Saved_Models/EDA_larg_uncased/EDA_all_5_large\EDA_all_5_large
Epoch 2/30

Epoch 00002: val_accuracy improved from 0.59594 to 0.65367, saving model to ./Saved_Models/EDA_larg_uncased/EDA_all_5_large\EDA_all_5_large
Epoch 3/30

Epoch 00003: val_accuracy improved from 0.65367 to 0.65939, saving model to ./Saved_Models/EDA_larg_uncased/EDA_all_5_large\EDA_all_5_large
Epoch 4/30

Epoch 00004: val_accuracy improved from 0.65939 to 0.67031, saving model to ./Saved_Models/EDA_larg_uncased/EDA_all_5_large\EDA_all_5_large
Epoch 5/30

Epoch 00005: val_accuracy did not improve from 0.67031
Epoch 6/30

Epoch 00006: val_accuracy improved from 0.67031 to 0.67083, saving model to ./Saved_Models/EDA_larg_uncased/EDA_all_5_large\EDA_all_5_large
Epoch 7/30

Epoch 00007: val_accuracy improved from 0.67083 to 0.67551, saving model to ./Saved_Models/EDA_larg_uncased/EDA_all_5_large\EDA_all_5_large
Epoch 8/30

Epoch 00008:

In [59]:
Accuracy_orig, Macro_F1_orig, ROC_AUC_orig, metrics_orig

(0.6963078523140925,
 0.684745493480006,
 0.7639715463789166,
 {'loss': [1.2350019216537476,
   1.1044189929962158,
   1.039764165878296,
   0.9939859509468079,
   0.9481862187385559,
   0.9214847087860107,
   0.9025112390518188,
   0.8812642097473145,
   0.863515317440033,
   0.8507573008537292,
   0.8308382034301758,
   0.8259435892105103,
   0.8188992142677307,
   0.8026054501533508,
   0.7927019596099854,
   0.7887464165687561],
  'accuracy': [0.3722940981388092,
   0.42689982056617737,
   0.48534095287323,
   0.5266202688217163,
   0.557173490524292,
   0.5732951760292053,
   0.588441789150238,
   0.5985178351402283,
   0.6113892197608948,
   0.6141844987869263,
   0.6318013668060303,
   0.6394071578979492,
   0.6373919248580933,
   0.6470129489898682,
   0.6535786390304565,
   0.6547487378120422],
  'balanced_recall': [0.5611453652381897,
   0.5756309628486633,
   0.6212756037712097,
   0.6626681089401245,
   0.7011670470237732,
   0.7143974900245667,
   0.7278687953948975,
   0.

In [60]:
Accuracy_aug_sr, Macro_F1_aug_sr, ROC_AUC_aug_sr, metrics_sr

(0.6989079563182528,
 0.6758681084374049,
 0.7669763868547642,
 {'loss': [1.0434372425079346,
   0.8761473298072815,
   0.8293533325195312,
   0.7980645298957825,
   0.7752760648727417,
   0.7560654282569885,
   0.7403826117515564,
   0.7245800495147705,
   0.7037743330001831,
   0.6819196939468384,
   0.6545854210853577,
   0.6159468293190002,
   0.5733673572540283],
  'accuracy': [0.47498321533203125,
   0.5964376330375671,
   0.6250298023223877,
   0.6467528939247131,
   0.6612386107444763,
   0.6696678400039673,
   0.6778694987297058,
   0.6866670846939087,
   0.6957789063453674,
   0.7086068987846375,
   0.721229076385498,
   0.7429630160331726,
   0.7596264481544495],
  'balanced_recall': [0.40563106536865234,
   0.5316692590713501,
   0.5047776103019714,
   0.49748098850250244,
   0.5195180773735046,
   0.5215157866477966,
   0.5363613963127136,
   0.5456236004829407,
   0.5514771938323975,
   0.5644664168357849,
   0.5748149752616882,
   0.5796377658843994,
   0.600847005844116

In [None]:
# Accuracy_aug_ri, Macro_F1_aug_ri, ROC_AUC_aug_ri, metrics_ri

In [None]:
# Accuracy_aug_rs, Macro_F1_aug_rs, ROC_AUC_aug_rs, metrics_rs

In [None]:
# Accuracy_aug_rd, Macro_F1_aug_rd, ROC_AUC_aug_rd, metrics_rd

In [None]:
# Accuracy_aug_all_1, Macro_F1_aug_all_1, ROC_AUC_aug_all_1, metrics_all_1

In [None]:
# Accuracy_aug_all_5, Macro_F1_aug_all_5, ROC_AUC_aug_all_5, metrics_all_5

In [67]:
trial_name_list = ['Original Data', 'Augmented SR 0.1', 'Augmented RI 0.1', 
                   'Augmented RS 0.1', 'Augmented RD 0.1', 'Augmented All 0.1', 'Augmented All 0.5']

acc_list = [Accuracy_orig, Accuracy_aug_sr, Accuracy_aug_ri, Accuracy_aug_rs, 
            Accuracy_aug_rd, Accuracy_aug_all_1, Accuracy_aug_all_5]

macro_f1_list = [Macro_F1_orig, Macro_F1_aug_sr, Macro_F1_aug_ri, Macro_F1_aug_rs, 
                 Macro_F1_aug_rd, Macro_F1_aug_all_1, Macro_F1_aug_all_5]

roc_auc_list = [ROC_AUC_orig, ROC_AUC_aug_sr, ROC_AUC_aug_ri, ROC_AUC_aug_rs, 
                ROC_AUC_aug_rd, ROC_AUC_aug_all_1, ROC_AUC_aug_all_5]

In [68]:
result_dict = {'Trial Name' : trial_name_list, 'Test Accuracy Score' : acc_list, 
               'Test Macro F1 Score' : macro_f1_list, 'Test ROC AUC Score' : roc_auc_list}

In [69]:
results_df = pd.DataFrame(result_dict)

results_df

Unnamed: 0,Trial Name,Test Accuracy Score,Test Macro F1 Score,Test ROC AUC Score
0,Original Data,0.696308,0.684745,0.763972
1,Augmented SR 0.1,0.698908,0.675868,0.766976
2,Augmented RI 0.1,0.699428,0.687185,0.767257
3,Augmented RS 0.1,0.683827,0.675714,0.758049
4,Augmented RD 0.1,0.695788,0.682732,0.764686
5,Augmented All 0.1,0.679147,0.666963,0.754397
6,Augmented All 0.5,0.704108,0.688736,0.769812


In [70]:
results_df.to_csv('./Saved_Models/EDA_base_uncased/All_DA_BERT_large_uncased.csv')

In [71]:
metrics_org_df = pd.DataFrame(metrics_orig)

metrics_org_df

Unnamed: 0,loss,accuracy,balanced_recall,balanced_precision,balanced_f1_score,auc_roc,val_loss,val_accuracy,val_balanced_recall,val_balanced_precision,val_balanced_f1_score,val_auc_roc
0,1.235002,0.372294,0.561145,0.344326,0.426264,0.528549,1.05253,0.466979,0.64404,0.382419,0.479387,0.605221
1,1.104419,0.4269,0.575631,0.362161,0.44417,0.571692,1.002272,0.522621,0.661262,0.412192,0.507172,0.655486
2,1.039764,0.485341,0.621276,0.391535,0.479963,0.623192,0.933015,0.568383,0.71593,0.443293,0.546874,0.714712
3,0.993986,0.52662,0.662668,0.418151,0.512337,0.665657,0.889838,0.605304,0.733001,0.486161,0.583323,0.743536
4,0.948186,0.557173,0.701167,0.439137,0.539609,0.700025,0.858946,0.620905,0.751207,0.494459,0.595131,0.762966
5,0.921485,0.573295,0.714397,0.458471,0.557984,0.719144,0.843118,0.634945,0.757525,0.503643,0.603786,0.77
6,0.902511,0.588442,0.727869,0.468315,0.569419,0.731918,0.817379,0.637546,0.782428,0.525686,0.627578,0.782069
7,0.881264,0.598518,0.739718,0.48254,0.583515,0.744793,0.81267,0.647426,0.762763,0.51891,0.616365,0.786421
8,0.863515,0.611389,0.732371,0.489688,0.586395,0.7501,0.794866,0.657826,0.753669,0.554886,0.638208,0.789241
9,0.850757,0.614184,0.737044,0.498026,0.593907,0.757297,0.794397,0.662507,0.753497,0.559534,0.641306,0.790277


In [72]:
metrics_list = [metrics_orig, metrics_sr, metrics_ri, metrics_rs, metrics_rd, metrics_all_1, metrics_all_5]
name_list = ['fit_metrics_orig.csv', 'fit_metrics_sr.csv', 'fit_metrics_ri.csv', 'fit_metrics_rs.csv', 'fit_metrics_rd.csv', 'fit_metrics_all_1.csv', 'fit_metrics_all_5.csv']

i = 0
for m in metrics_list:
    df = pd.DataFrame(m)
    df.to_csv(name_list[i])
    i += 1

        