### All Easy DA - BERT Base Uncased

#### Un-augmented test set
#### Augment only the training set - 2 augmented examples per original

#### Get Original Paper Data

In [1]:
# !pip install sklearn
# !pip install ekphrasis
# !pip install transformers
# !pip install spacy
# !python -m spacy download en_core_web_sm

In [2]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds

import transformers

from transformers import BertTokenizer, TFBertModel
from tensorflow.keras import backend as K
from tensorflow import keras

from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

import os

import logging
tf.get_logger().setLevel(logging.ERROR)

In [3]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [4]:
encoder = LabelEncoder()
encoder.classes_ = np.load('../Data/classes.npy', allow_pickle=True)

In [5]:
tf.__version__

'2.6.0'

In [6]:
transformers.__version__

'4.16.2'

In [7]:
# test, train, dev examples from base notebook

train_data_df = pd.read_csv('./Saved_Models/EDA_base_uncased_5aug/All_DA_BERT_base_uncased_train_examples.csv')
dev_data_df = pd.read_csv('./Saved_Models/EDA_base_uncased_5aug/All_DA_BERT_base_uncased_dev_examples.csv')
test_data_df = pd.read_csv('./Saved_Models/EDA_base_uncased_5aug/All_DA_BERT_base_uncased_test_examples.csv')

In [8]:
train_data_df.head()

Unnamed: 0.1,Unnamed: 0,post_id,text,target1,target2,target3,rationales,final_label,text_combined
0,0,22448349_gab,"['common', 'core', 'weed', 'too', 'much', 'rit...","['Men', 'Women']",['Women'],['None'],[],normal,common core weed too much ritalan chem trails ...
1,1,1178948520201637888_twitter,"['took', 'my', 'nan', 'to', 'the', 'hospital',...",['None'],['None'],['None'],[],normal,took my nan to the hospital for a x ray i turn...
2,2,1482573_gab,"['<user>', 'well', 'not', 'really', 'islam', '...",['Islam'],['Other'],['Islam'],"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,...",offensive,<user> well not really islam does not care for...
3,3,1097184028149587969_twitter,"['<user>', 'france', 'in', '<number>', 'after'...","['Islam', 'Other']",['Islam'],['Islam'],[],normal,<user> france in <number> after muslims take o...
4,4,1089569255111176192_twitter,"['i', 'will', 'not', 'tolerate', 'non', 'arab'...","['Arab', 'Men', 'Women']",['Arab'],"['Arab', 'Islam']","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,...",hatespeech,i will not tolerate non arab women slandering ...


In [9]:
X_train_id = train_data_df['post_id']
X_test_id = test_data_df['post_id']
X_dev_id = dev_data_df['post_id']

In [10]:
y_train = train_data_df['final_label']
y_test = test_data_df['final_label']
y_dev = dev_data_df['final_label']

In [11]:
x_train_df = pd.DataFrame({'post_id' : X_train_id.to_list()})
x_dev_df = pd.DataFrame({'post_id' : X_dev_id.to_list()})
x_test_df = pd.DataFrame({'post_id' : X_test_id.to_list()})

# X_train_df = pd.merge(x_train_df, raw_data_final, how='inner', on='post_id')
# X_dev_df = pd.merge(x_dev_df, raw_data_final, how='inner', on='post_id')
# X_test_df = pd.merge(x_test_df, raw_data_final, how='inner', on='post_id')

X_train_text = train_data_df['text_combined'].to_list()
X_dev_text= dev_data_df['text_combined'].to_list()
X_test_text = test_data_df['text_combined'].to_list()

print(len(X_train_text))
print(len(X_dev_text))
print(len(X_test_text))

15383
1923
1923


### Get Augmented Data

In [12]:
# load augmented datasets generated by EDA
# sr = synonym replacement
# ri = random synonym insertion
# rs = random swap
# rd = random deletion
# dataframe name format: method_number 

sr_1_df = pd.read_csv('../test_data_set/EDA_2_0_7_sr_rest_0_1.csv')
ri_1_df = pd.read_csv('../test_data_set/EDA_2_0_7_ri_rest_0_1.csv')
rs_1_df = pd.read_csv('../test_data_set/EDA_2_0_7_rs_rest_0_1.csv')
rd_1_df = pd.read_csv('../test_data_set/EDA_2_0_7_rd_rest_0_1.csv')
all_1_df = pd.read_csv('../test_data_set/EDA_2_all_0_1s.csv')
all_5_df = pd.read_csv('../test_data_set/EDA_2_all_0_5s.csv')

# remove undecided labeled examples
sr_1_df_filtered = sr_1_df[sr_1_df['final_label'] != 'undecided']
ri_1_df_filtered = ri_1_df[ri_1_df['final_label'] != 'undecided']
rs_1_df_filtered = rs_1_df[rs_1_df['final_label'] != 'undecided']
rd_1_df_filtered = rd_1_df[rd_1_df['final_label'] != 'undecided']
all_1_df_filtered = all_1_df[all_1_df['final_label'] != 'undecided']
all_5_df_filtered = all_5_df[all_5_df['final_label'] != 'undecided']

len(sr_1_df_filtered)

57687

In [13]:
# separate train, dev, test for each set
sr_1_df_train = sr_1_df_filtered[sr_1_df_filtered['post_id'].isin(X_train_id)]
ri_1_df_train = ri_1_df_filtered[ri_1_df_filtered['post_id'].isin(X_train_id)]
rs_1_df_train = rs_1_df_filtered[rs_1_df_filtered['post_id'].isin(X_train_id)]
rd_1_df_train = rd_1_df_filtered[rd_1_df_filtered['post_id'].isin(X_train_id)]
all_1_df_train = all_1_df_filtered[all_1_df_filtered['post_id'].isin(X_train_id)]
all_5_df_train = all_5_df_filtered[all_5_df_filtered['post_id'].isin(X_train_id)]

# select text sets

aug_sr_text = sr_1_df_train['text_str'].to_list()
aug_ri_text = ri_1_df_train['text_str'].to_list()
aug_rs_text = rs_1_df_train['text_str'].to_list()
aug_rd_text = rd_1_df_train['text_str'].to_list()
aug_all_1_text = all_1_df_train['text_str'].to_list()
aug_all_5_text = all_5_df_train['text_str'].to_list()

# select label sets

aug_sr_labels = sr_1_df_train['final_label']
aug_ri_labels = ri_1_df_train['final_label']
aug_rs_labels = rs_1_df_train['final_label']
aug_rd_labels = rd_1_df_train['final_label']
aug_all_1_labels = all_1_df_train['final_label']
aug_all_5_labels = all_5_df_train['final_label']

len(aug_sr_text)

46149

#### Convert labels to one-hot encoding

In [14]:
# convert class label to 1 hot encoding

def convert_to_oh(S):
    '''takes a pandas series of text labels and returns one hot encoding equivalent
    0 = normal, 1 = offensive, 2 = hatespeech
    ''' 
    S_numerical = S.apply(lambda x: 0 if x=='normal' else (1 if x=='offensive' else 2))
    S_oh = keras.utils.to_categorical(S_numerical, num_classes = 3, dtype = 'float32')
    return S_oh
    
# original dataset - train, dev, and train
y_train_orig = convert_to_oh(pd.Series(y_train))
y_dev_orig = convert_to_oh(pd.Series(y_dev))
y_test_orig = convert_to_oh(pd.Series(y_test))

# augmented with sr = 0.1
y_train_aug_sr = convert_to_oh(aug_sr_labels)

# augmented with ri = 0.1
y_train_aug_ri = convert_to_oh(aug_ri_labels)

# augmented with rs = 0.1
y_train_aug_rs = convert_to_oh(aug_rs_labels)

# augmented with rd = 0.1
y_train_aug_rd = convert_to_oh(aug_rd_labels)

# augmented with all = 0.1
y_train_all_1 = convert_to_oh(aug_all_1_labels)

# augmented with all = 0.5
y_train_all_5 = convert_to_oh(aug_all_5_labels)


In [15]:
len(aug_ri_text)

46149

In [16]:
len(y_train_aug_ri)

46149

In [17]:
y_train_aug_ri

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       ...,
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.]], dtype=float32)

### BERT Model

In [18]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
#bert_model = TFBertModel.from_pretrained('bert-base-uncased')

In [19]:
aug_sr_text

['i dont think im getting my baby them white he has two white and nikes not even touched',
 'i dont think im getting my baby them white he has two white j and nikes not even affected touched',
 'i dont think im getting my baby them white he has two white j and nikes not even touched',
 'we cannot proceed foretell ourselves womens rightist if the compensate of all womxn arent turn to yes to a intimate offensive activity world lean but will a trans tribade bisexual person and gay womxn be able bodied to go into their info on the reportage sheet of paper sexuality meeting place',
 'we cannot continue calling ourselves feminists if the rights of all womxn arent addressed yes to a sexual public list but will a trans lesbian bisexual and queer be able to enter their information the reporting sheet gender forum',
 'we cannot continue calling ourselves feminists if the rights of all womxn arent addressed yes to a sexual offences public list but will a trans lesbian bisexual and queer womxn be 

In [20]:
max_length = 128

def bert_tokenize(train_set, dev_set, test_set, max_length):
    
    train = tokenizer(train_set, max_length=max_length, truncation=True, padding='max_length', return_tensors='tf')
    dev = tokenizer(dev_set, max_length=max_length, truncation=True, padding='max_length', return_tensors='tf')
    test = tokenizer(test_set, max_length=max_length, truncation=True, padding='max_length', return_tensors='tf')
    
    return train, dev, test

X_train_orig, X_dev_orig, X_test_orig = bert_tokenize(X_train_text, X_dev_text, X_test_text, max_length)

X_train_aug_sr, X_dev_aug_sr, X_test_aug_sr = bert_tokenize(aug_sr_text, X_dev_text, X_test_text, max_length)

X_train_aug_ri, X_dev_aug_ri, X_test_aug_ri = bert_tokenize(aug_ri_text, X_dev_text, X_test_text, max_length)

X_train_aug_rs, X_dev_aug_rs, X_test_aug_rs = bert_tokenize(aug_rs_text, X_dev_text, X_test_text, max_length)

X_train_aug_rd, X_dev_aug_rd, X_test_aug_rd = bert_tokenize(aug_rd_text, X_dev_text, X_test_text, max_length)

X_train_all_1, X_dev_all_1, X_test_all_1 = bert_tokenize(aug_all_1_text, X_dev_text, X_test_text, max_length)

X_train_all_5, X_dev_all_5, X_test_all_5 = bert_tokenize(aug_all_5_text, X_dev_text, X_test_text, max_length)


In [21]:
#tokenizer.save_pretrained("./Tokenizer_ALL_EDA_BERT_base_uncased")

In [22]:
X_train_orig.keys()

dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])

In [23]:
X_train_aug_sr.input_ids

<tf.Tensor: shape=(46149, 128), dtype=int32, numpy=
array([[  101,  1045,  2123, ...,     0,     0,     0],
       [  101,  1045,  2123, ...,     0,     0,     0],
       [  101,  1045,  2123, ...,     0,     0,     0],
       ...,
       [  101,  1996,  3644, ...,     0,     0,     0],
       [  101,  1996, 18414, ...,     0,     0,     0],
       [  101,  1996,  3644, ...,     0,     0,     0]])>

In [24]:
X_train_aug_ri.token_type_ids

<tf.Tensor: shape=(46149, 128), dtype=int32, numpy=
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])>

In [25]:
X_train_all_1.attention_mask

<tf.Tensor: shape=(46149, 128), dtype=int32, numpy=
array([[1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0]])>

In [26]:
X_train_all_5.input_ids

<tf.Tensor: shape=(46149, 128), dtype=int32, numpy=
array([[  101,  2228,  2123, ...,     0,     0,     0],
       [  101,  1045,  2292, ...,     0,     0,     0],
       [  101,  1045,  2123, ...,     0,     0,     0],
       ...,
       [  101,  1996, 18414, ...,     0,     0,     0],
       [  101,  2637,  3644, ...,     0,     0,     0],
       [  101,  1996,  3644, ...,     0,     0,     0]])>

In [27]:
from keras import backend as K

def balanced_recall(y_true, y_pred):
    """This function calculates the balanced recall metric
    recall = TP / (TP + FN)
    """
    recall_by_class = 0
    # iterate over each predicted class to get class-specific metric
    for i in range(y_pred.shape[1]):
        y_pred_class = y_pred[:, i]
        y_true_class = y_true[:, i]
        true_positives = K.sum(K.round(K.clip(y_true_class * y_pred_class, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true_class, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        recall_by_class = recall_by_class + recall
    return recall_by_class / y_pred.shape[1]

def balanced_precision(y_true, y_pred):
    """This function calculates the balanced precision metric
    precision = TP / (TP + FP)
    """
    precision_by_class = 0
    # iterate over each predicted class to get class-specific metric
    for i in range(y_pred.shape[1]):
        y_pred_class = y_pred[:, i]
        y_true_class = y_true[:, i]
        true_positives = K.sum(K.round(K.clip(y_true_class * y_pred_class, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred_class, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        precision_by_class = precision_by_class + precision
    # return average balanced metric for each class
    return precision_by_class / y_pred.shape[1]

def balanced_f1_score(y_true, y_pred):
    """This function calculates the F1 score metric"""
    precision = balanced_precision(y_true, y_pred)
    recall = balanced_recall(y_true, y_pred)
    return 2 * ((precision * recall) / (precision + recall + K.epsilon()))

In [28]:
def create_classification_model(bert_model, hidden_size = 5, 
                                train_layers = -1, 
                                optimizer=tf.keras.optimizers.Adam()):
    """
    Build a simple classification model with BERT. Let's keep it simple and don't add dropout, layer norms, etc.
    """

    input_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='input_ids_layer')
    token_type_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='token_type_ids_layer')
    attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='attention_mask_layer')

    bert_inputs = {'input_ids': input_ids,
                  'token_type_ids': token_type_ids,
                  'attention_mask': attention_mask}


    #restrict training to the train_layers outer transformer layers
    if not train_layers == -1:

            retrain_layers = []

            for retrain_layer_number in range(train_layers):

                layer_code = '_' + str(11 - retrain_layer_number)
                retrain_layers.append(layer_code)

            for w in bert_model.weights:
                if not any([x in w.name for x in retrain_layers]):
                    w._trainable = False


    bert_out = bert_model(bert_inputs)
    
    net = bert_out[0]
    
    classification_token = tf.keras.layers.Lambda(lambda x: x[:,0,:], name='get_first_vector')(net)
    
    dropout1 = tf.keras.layers.Dropout(0.4, name="dropout1")(classification_token)
    
    hidden = tf.keras.layers.Dense(hidden_size, name='hidden_layer')(dropout1)
    
    dropout2 = tf.keras.layers.Dropout(0.4, name="dropout2")(hidden)

    classification = tf.keras.layers.Dense(3, activation='sigmoid',name='classification_layer')(dropout2)

    classification_model = tf.keras.Model(inputs=[input_ids, token_type_ids, attention_mask], 
                                          outputs=[classification])
    
    METRICS = [tf.keras.metrics.CategoricalAccuracy(name="accuracy"), 
               balanced_recall, 
               balanced_precision, 
               balanced_f1_score,
               tf.keras.metrics.AUC(curve='ROC', name="auc_roc")]
    
    
    classification_model.compile(optimizer=optimizer,
                            loss=tf.keras.losses.CategoricalCrossentropy(),
                            metrics= METRICS)


    return classification_model




#     classification_model.compile(optimizer=optimizer,
#                             loss=tf.keras.losses.CategoricalCrossentropy(),
#                             metrics=tf.keras.metrics.CategoricalAccuracy('accuracy'))

In [29]:
def fine_tune_BERT(x_train, x_dev, x_test, y_train, y_dev, y_test, name, learning_rate = 5e-05, 
                   epsilon=1e-08, train_layers = -1, epochs = 10, batch_size = 16):
    ''' Fine tunes BERT base uncased with given data, allows your to set some hyperparameters
        returns test set accuracy, f1 score, and AUC_ROC score
    '''
    try:
        del classification_model
    except:
        pass

    try:
        del bert_model
    except:
        pass
    
    tf.keras.backend.clear_session()
    bert_model = TFBertModel.from_pretrained('bert-base-uncased')

    # early stopping callback
    
    earlystop_callback = tf.keras.callbacks.EarlyStopping(monitor = 'val_accuracy', 
                                                      patience = 4,
                                                      restore_best_weights = True)
    
    # Create a callback that saves the model's weights
    
    path_name = './Saved_Models/EDA_b_2aug/' + name + '/' + name

    cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=path_name, 
                                                     save_weights_only=True,
                                                     verbose=1,
                                                     monitor='val_accuracy',
                                                     save_best_only=True)
    
    # create classification model
    classification_model = create_classification_model(bert_model, 
                                                       optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=epsilon),
                                                       train_layers=train_layers)    
    
    model_fit = classification_model.fit([x_train.input_ids, x_train.token_type_ids, x_train.attention_mask],
                         y_train,
                         validation_data=([x_dev.input_ids, x_dev.token_type_ids, x_dev.attention_mask],
                         y_dev),
                        epochs=epochs,
                        batch_size=batch_size,
                        callbacks = [earlystop_callback, cp_callback])
    
    y_preds_array = classification_model.predict([x_test.input_ids, x_test.token_type_ids, x_test.attention_mask])

    # convert to predicted one-hot encoding

    from keras.utils.np_utils import to_categorical
    y_preds = to_categorical(np.argmax(y_preds_array, 1), dtype = "int64")

    # convert back to labels

    y_test_cat = np.argmax(y_test, axis=1)
    y_preds_cat = np.argmax(y_preds, axis=1)
    
    # calculate metrics
    Accuracy = accuracy_score(y_test_cat, y_preds_cat)

    Macro_F1 = f1_score(y_test_cat, y_preds_cat, average='macro')

    ROC_AUC = roc_auc_score(y_test, y_preds, multi_class='ovo',average='macro')
    
    metrics_history = model_fit.history
    
    return Accuracy, Macro_F1, ROC_AUC, metrics_history

In [30]:
%%time
# original data set
Accuracy_orig, Macro_F1_orig, ROC_AUC_orig, metrics_orig = fine_tune_BERT(X_train_orig, X_dev_orig, X_test_orig, 
                                                            y_train_orig, y_dev_orig, y_test_orig, 'orig_data_2aug',
                                                            learning_rate = 2e-05, epsilon=1e-08, 
                                                            train_layers = 1, epochs = 30, batch_size = 64)

Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Epoch 1/30

Epoch 00001: val_accuracy improved from -inf to 0.55382, saving model to ./Saved_Models/EDA_b_2aug/orig_data_2aug\orig_data_2aug
Epoch 2/30

Epoch 00002: val_accuracy improved from 0.55382 to 0.57722, saving model to ./Saved_Models/EDA_b_2aug/orig_data_2aug\orig_data_2aug
Epoch 3/30

Epoch 00003: val_accuracy improved from 0.57722 to 0.60738, saving model to ./Saved_Models/EDA_b_2aug/orig_data_2aug\orig_data_2aug
Epoch 4/30

Epoch 00004: val_accuracy improved from 0.60738 to 0.62663, saving model to ./Saved_Models/EDA_b_2aug/orig_data_2aug\orig_data_2aug
Epoch 5/30

Epoch 00005: val_accuracy improved from 0.62663 to 0.63599, saving model to ./Saved_Models/EDA_b_2aug/orig_data_2aug\orig_data_2aug
Epoch 6/30

Epoch 00006: val_accuracy improved from 0.63599 to 0.64483, saving model to ./Saved_Models/EDA_b_2aug/orig_data_2aug\orig_data_2aug
Epoch 7/30

Epoch 00007: val_accuracy improved from 0.64483 to 0.65263, saving model to ./Saved_Models/EDA_b_2aug/orig_data_2aug\orig_data_


Epoch 00017: val_accuracy improved from 0.67499 to 0.67967, saving model to ./Saved_Models/EDA_b_2aug/orig_data_2aug\orig_data_2aug
Epoch 18/30

Epoch 00018: val_accuracy did not improve from 0.67967
Epoch 19/30

Epoch 00019: val_accuracy did not improve from 0.67967
Epoch 20/30

Epoch 00020: val_accuracy did not improve from 0.67967
Epoch 21/30

Epoch 00021: val_accuracy did not improve from 0.67967
Wall time: 17min 9s


In [31]:
%%time
# augmented with sr = 0.1
Accuracy_aug_sr, Macro_F1_aug_sr, ROC_AUC_aug_sr, metrics_sr = fine_tune_BERT(X_train_aug_sr, X_dev_aug_sr, X_test_aug_sr, 
                                                            y_train_aug_sr, y_dev_orig, y_test_orig, 'EDA_sr_2aug', 
                                                            learning_rate = 2e-05, epsilon=1e-08, 
                                                            train_layers = 1, epochs = 30, batch_size = 64)

Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Epoch 1/30

Epoch 00001: val_accuracy improved from -inf to 0.59386, saving model to ./Saved_Models/EDA_b_2aug/EDA_sr_2aug\EDA_sr_2aug
Epoch 2/30

Epoch 00002: val_accuracy improved from 0.59386 to 0.62923, saving model to ./Saved_Models/EDA_b_2aug/EDA_sr_2aug\EDA_sr_2aug
Epoch 3/30

Epoch 00003: val_accuracy improved from 0.62923 to 0.64847, saving model to ./Saved_Models/EDA_b_2aug/EDA_sr_2aug\EDA_sr_2aug
Epoch 4/30

Epoch 00004: val_accuracy improved from 0.64847 to 0.66303, saving model to ./Saved_Models/EDA_b_2aug/EDA_sr_2aug\EDA_sr_2aug
Epoch 5/30

Epoch 00005: val_accuracy improved from 0.66303 to 0.66719, saving model to ./Saved_Models/EDA_b_2aug/EDA_sr_2aug\EDA_sr_2aug
Epoch 6/30

Epoch 00006: val_accuracy improved from 0.66719 to 0.67083, saving model to ./Saved_Models/EDA_b_2aug/EDA_sr_2aug\EDA_sr_2aug
Epoch 7/30

Epoch 00007: val_accuracy improved from 0.67083 to 0.68487, saving model to ./Saved_Models/EDA_b_2aug/EDA_sr_2aug\EDA_sr_2aug
Epoch 8/30

Epoch 00008: val_accuracy

In [32]:
%%time
# augmented with ri = 0.1
Accuracy_aug_ri, Macro_F1_aug_ri, ROC_AUC_aug_ri, metrics_ri = fine_tune_BERT(X_train_aug_ri, X_dev_aug_ri, X_test_aug_ri, 
                                                            y_train_aug_ri, y_dev_orig, y_test_orig, 'EDA_ri_2aug', 
                                                            learning_rate = 2e-05, epsilon=1e-08, 
                                                            train_layers = 1, epochs = 30, batch_size = 64)

Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Epoch 1/30

Epoch 00001: val_accuracy improved from -inf to 0.60842, saving model to ./Saved_Models/EDA_b_2aug/EDA_ri_2aug\EDA_ri_2aug
Epoch 2/30

Epoch 00002: val_accuracy improved from 0.60842 to 0.64483, saving model to ./Saved_Models/EDA_b_2aug/EDA_ri_2aug\EDA_ri_2aug
Epoch 3/30

Epoch 00003: val_accuracy improved from 0.64483 to 0.65107, saving model to ./Saved_Models/EDA_b_2aug/EDA_ri_2aug\EDA_ri_2aug
Epoch 4/30

Epoch 00004: val_accuracy improved from 0.65107 to 0.66355, saving model to ./Saved_Models/EDA_b_2aug/EDA_ri_2aug\EDA_ri_2aug
Epoch 5/30

Epoch 00005: val_accuracy did not improve from 0.66355
Epoch 6/30

Epoch 00006: val_accuracy improved from 0.66355 to 0.67395, saving model to ./Saved_Models/EDA_b_2aug/EDA_ri_2aug\EDA_ri_2aug
Epoch 7/30

Epoch 00007: val_accuracy did not improve from 0.67395
Epoch 8/30

Epoch 00008: val_accuracy improved from 0.67395 to 0.67759, saving model to ./Saved_Models/EDA_b_2aug/EDA_ri_2aug\EDA_ri_2aug
Epoch 9/30

Epoch 00009: val_accuracy imp

In [33]:
%%time
# augmented with rs = 0.1
Accuracy_aug_rs, Macro_F1_aug_rs, ROC_AUC_aug_rs, metrics_rs = fine_tune_BERT(X_train_aug_rs, X_dev_aug_rs, X_test_aug_rs, 
                                                            y_train_aug_rs, y_dev_orig, y_test_orig, 'EDA_rs_2aug',
                                                            learning_rate = 2e-05, epsilon=1e-08, 
                                                            train_layers = 1, epochs = 30, batch_size = 64)

Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Epoch 1/30

Epoch 00001: val_accuracy improved from -inf to 0.60062, saving model to ./Saved_Models/EDA_b_2aug/EDA_rs_2aug\EDA_rs_2aug
Epoch 2/30

Epoch 00002: val_accuracy improved from 0.60062 to 0.63599, saving model to ./Saved_Models/EDA_b_2aug/EDA_rs_2aug\EDA_rs_2aug
Epoch 3/30

Epoch 00003: val_accuracy improved from 0.63599 to 0.64951, saving model to ./Saved_Models/EDA_b_2aug/EDA_rs_2aug\EDA_rs_2aug
Epoch 4/30

Epoch 00004: val_accuracy improved from 0.64951 to 0.65419, saving model to ./Saved_Models/EDA_b_2aug/EDA_rs_2aug\EDA_rs_2aug
Epoch 5/30

Epoch 00005: val_accuracy improved from 0.65419 to 0.67551, saving model to ./Saved_Models/EDA_b_2aug/EDA_rs_2aug\EDA_rs_2aug
Epoch 6/30

Epoch 00006: val_accuracy did not improve from 0.67551
Epoch 7/30

Epoch 00007: val_accuracy did not improve from 0.67551
Epoch 8/30

Epoch 00008: val_accuracy did not improve from 0.67551
Epoch 9/30

Epoch 00009: val_accuracy improved from 0.67551 to 0.68019, saving model to ./Saved_Models/EDA_b_2au

In [34]:
%%time
# augmented with rd = 0.1
Accuracy_aug_rd, Macro_F1_aug_rd, ROC_AUC_aug_rd, metrics_rd = fine_tune_BERT(X_train_aug_rd, X_dev_aug_rd, X_test_aug_rd, 
                                                            y_train_aug_rd, y_dev_orig, y_test_orig, 'EDA_rd_2aug',
                                                            learning_rate = 2e-05, epsilon=1e-08, 
                                                            train_layers = 1, epochs = 30, batch_size = 64)

Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Epoch 1/30

Epoch 00001: val_accuracy improved from -inf to 0.58034, saving model to ./Saved_Models/EDA_b_2aug/EDA_rd_2aug\EDA_rd_2aug
Epoch 2/30

Epoch 00002: val_accuracy improved from 0.58034 to 0.62142, saving model to ./Saved_Models/EDA_b_2aug/EDA_rd_2aug\EDA_rd_2aug
Epoch 3/30

Epoch 00003: val_accuracy improved from 0.62142 to 0.64327, saving model to ./Saved_Models/EDA_b_2aug/EDA_rd_2aug\EDA_rd_2aug
Epoch 4/30

Epoch 00004: val_accuracy improved from 0.64327 to 0.65679, saving model to ./Saved_Models/EDA_b_2aug/EDA_rd_2aug\EDA_rd_2aug
Epoch 5/30

Epoch 00005: val_accuracy improved from 0.65679 to 0.66303, saving model to ./Saved_Models/EDA_b_2aug/EDA_rd_2aug\EDA_rd_2aug
Epoch 6/30

Epoch 00006: val_accuracy improved from 0.66303 to 0.66459, saving model to ./Saved_Models/EDA_b_2aug/EDA_rd_2aug\EDA_rd_2aug
Epoch 7/30

Epoch 00007: val_accuracy improved from 0.66459 to 0.66927, saving model to ./Saved_Models/EDA_b_2aug/EDA_rd_2aug\EDA_rd_2aug
Epoch 8/30

Epoch 00008: val_accuracy

In [35]:
%%time
# augmented with all = 0.1
Accuracy_aug_all_1, Macro_F1_aug_all_1, ROC_AUC_aug_all_1, metrics_all_1 = fine_tune_BERT(X_train_all_1, X_dev_all_1, X_test_all_1, 
                                                            y_train_all_1, y_dev_orig, y_test_orig, 'EDA_all_1_2aug',
                                                            learning_rate = 2e-05, epsilon=1e-08, 
                                                            train_layers = 1, epochs = 30, batch_size = 64)

Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Epoch 1/30

Epoch 00001: val_accuracy improved from -inf to 0.59750, saving model to ./Saved_Models/EDA_b_2aug/EDA_all_1_2aug\EDA_all_1_2aug
Epoch 2/30

Epoch 00002: val_accuracy improved from 0.59750 to 0.64015, saving model to ./Saved_Models/EDA_b_2aug/EDA_all_1_2aug\EDA_all_1_2aug
Epoch 3/30

Epoch 00003: val_accuracy improved from 0.64015 to 0.65731, saving model to ./Saved_Models/EDA_b_2aug/EDA_all_1_2aug\EDA_all_1_2aug
Epoch 4/30

Epoch 00004: val_accuracy improved from 0.65731 to 0.66095, saving model to ./Saved_Models/EDA_b_2aug/EDA_all_1_2aug\EDA_all_1_2aug
Epoch 5/30

Epoch 00005: val_accuracy improved from 0.66095 to 0.66719, saving model to ./Saved_Models/EDA_b_2aug/EDA_all_1_2aug\EDA_all_1_2aug
Epoch 6/30

Epoch 00006: val_accuracy improved from 0.66719 to 0.67343, saving model to ./Saved_Models/EDA_b_2aug/EDA_all_1_2aug\EDA_all_1_2aug
Epoch 7/30

Epoch 00007: val_accuracy improved from 0.67343 to 0.67759, saving model to ./Saved_Models/EDA_b_2aug/EDA_all_1_2aug\EDA_all_1_

In [36]:
%%time
# augmented with all = 0.5
Accuracy_aug_all_5, Macro_F1_aug_all_5, ROC_AUC_aug_all_5, metrics_all_5 = fine_tune_BERT(X_train_all_5, X_dev_all_5, X_test_all_5, 
                                                            y_train_all_5, y_dev_orig, y_test_orig, 'EDA_all_5_2aug',
                                                            learning_rate = 2e-05, epsilon=1e-08, 
                                                            train_layers = 1, epochs = 30, batch_size = 64)

Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Epoch 1/30

Epoch 00001: val_accuracy improved from -inf to 0.56006, saving model to ./Saved_Models/EDA_b_2aug/EDA_all_5_2aug\EDA_all_5_2aug
Epoch 2/30

Epoch 00002: val_accuracy improved from 0.56006 to 0.62559, saving model to ./Saved_Models/EDA_b_2aug/EDA_all_5_2aug\EDA_all_5_2aug
Epoch 3/30

Epoch 00003: val_accuracy improved from 0.62559 to 0.64015, saving model to ./Saved_Models/EDA_b_2aug/EDA_all_5_2aug\EDA_all_5_2aug
Epoch 4/30

Epoch 00004: val_accuracy improved from 0.64015 to 0.65367, saving model to ./Saved_Models/EDA_b_2aug/EDA_all_5_2aug\EDA_all_5_2aug
Epoch 5/30

Epoch 00005: val_accuracy improved from 0.65367 to 0.65887, saving model to ./Saved_Models/EDA_b_2aug/EDA_all_5_2aug\EDA_all_5_2aug
Epoch 6/30

Epoch 00006: val_accuracy did not improve from 0.65887
Epoch 7/30

Epoch 00007: val_accuracy did not improve from 0.65887
Epoch 8/30

Epoch 00008: val_accuracy improved from 0.65887 to 0.67291, saving model to ./Saved_Models/EDA_b_2aug/EDA_all_5_2aug\EDA_all_5_2aug
Epoch

In [37]:
# Accuracy_orig, Macro_F1_orig, ROC_AUC_orig, metrics_orig

In [38]:
# Accuracy_aug_sr, Macro_F1_aug_sr, ROC_AUC_aug_sr, metrics_sr

In [39]:
# Accuracy_aug_ri, Macro_F1_aug_ri, ROC_AUC_aug_ri, metrics_ri

In [40]:
# Accuracy_aug_rs, Macro_F1_aug_rs, ROC_AUC_aug_rs, metrics_rs

In [41]:
# Accuracy_aug_rd, Macro_F1_aug_rd, ROC_AUC_aug_rd, metrics_rd

In [42]:
# Accuracy_aug_all_1, Macro_F1_aug_all_1, ROC_AUC_aug_all_1, metrics_all_1

In [43]:
# Accuracy_aug_all_5, Macro_F1_aug_all_5, ROC_AUC_aug_all_5, metrics_all_5

In [44]:
trial_name_list = ['Original Data', 'Augmented SR 0.1', 'Augmented RI 0.1', 
                   'Augmented RS 0.1', 'Augmented RD 0.1', 'Augmented All 0.1', 'Augmented All 0.5']

acc_list = [Accuracy_orig, Accuracy_aug_sr, Accuracy_aug_ri, Accuracy_aug_rs, 
            Accuracy_aug_rd, Accuracy_aug_all_1, Accuracy_aug_all_5]

macro_f1_list = [Macro_F1_orig, Macro_F1_aug_sr, Macro_F1_aug_ri, Macro_F1_aug_rs, 
                 Macro_F1_aug_rd, Macro_F1_aug_all_1, Macro_F1_aug_all_5]

roc_auc_list = [ROC_AUC_orig, ROC_AUC_aug_sr, ROC_AUC_aug_ri, ROC_AUC_aug_rs, 
                ROC_AUC_aug_rd, ROC_AUC_aug_all_1, ROC_AUC_aug_all_5]

In [45]:
result_dict = {'Trial Name' : trial_name_list, 'Test Accuracy Score' : acc_list, 
               'Test Macro F1 Score' : macro_f1_list, 'Test ROC AUC Score' : roc_auc_list}

In [46]:
results_df = pd.DataFrame(result_dict)

results_df

Unnamed: 0,Trial Name,Test Accuracy Score,Test Macro F1 Score,Test ROC AUC Score
0,Original Data,0.704108,0.683571,0.766626
1,Augmented SR 0.1,0.706708,0.690633,0.77154
2,Augmented RI 0.1,0.706708,0.678945,0.766378
3,Augmented RS 0.1,0.702028,0.682531,0.767242
4,Augmented RD 0.1,0.704108,0.685222,0.767748
5,Augmented All 0.1,0.700468,0.680099,0.765551
6,Augmented All 0.5,0.709308,0.688697,0.770174


In [48]:
results_df.to_csv('./Saved_Models/EDA_b_2aug/All_DA_BERT_base_uncased_2aug.csv')

In [49]:
metrics_org_df = pd.DataFrame(metrics_orig)

metrics_org_df

Unnamed: 0,loss,accuracy,balanced_recall,balanced_precision,balanced_f1_score,auc_roc,val_loss,val_accuracy,val_balanced_recall,val_balanced_precision,val_balanced_f1_score,val_auc_roc
0,1.302126,0.410323,0.596227,0.379239,0.462897,0.586618,0.946449,0.553822,0.730728,0.449926,0.555386,0.715429
1,1.11246,0.47338,0.666064,0.419393,0.514376,0.653588,0.883785,0.577223,0.760037,0.495105,0.598369,0.749245
2,1.039878,0.503543,0.690905,0.441141,0.538162,0.685419,0.841491,0.607384,0.797062,0.51511,0.624579,0.771678
3,0.989183,0.531756,0.717991,0.455955,0.557389,0.708891,0.82217,0.626625,0.791272,0.524362,0.629554,0.784234
4,0.951159,0.548008,0.730004,0.469056,0.570708,0.725125,0.811679,0.635985,0.795569,0.519307,0.627202,0.792961
5,0.927721,0.565364,0.752061,0.478627,0.584627,0.736992,0.800559,0.644826,0.801874,0.523587,0.632252,0.798544
6,0.905224,0.582396,0.748439,0.484215,0.587644,0.745558,0.789761,0.652626,0.801007,0.533724,0.639368,0.803863
7,0.89955,0.583371,0.761611,0.488783,0.595107,0.749046,0.778341,0.655226,0.827495,0.542153,0.653905,0.799929
8,0.886113,0.590262,0.761964,0.48855,0.59502,0.754307,0.770328,0.657306,0.818399,0.542269,0.651111,0.80703
9,0.868758,0.602223,0.771333,0.498669,0.605378,0.762012,0.766495,0.658866,0.812213,0.55057,0.655185,0.808352


In [50]:
metrics_list = [metrics_orig, metrics_sr, metrics_ri, metrics_rs, metrics_rd, metrics_all_1, metrics_all_5]
name_list = ['fit_metrics_orig.csv', 'fit_metrics_sr.csv', 'fit_metrics_ri.csv', 'fit_metrics_rs.csv', 'fit_metrics_rd.csv', 'fit_metrics_all_1.csv', 'fit_metrics_all_5.csv']

i = 0
for m in metrics_list:
    df = pd.DataFrame(m)
    df.to_csv(name_list[i])
    i += 1

        