# Import Library

In [None]:
!pip install sastrawi --quiet

In [None]:
seed = 42

In [None]:
import os
os.environ['PYTHONHASHSEED']=str(seed)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import transformers
import string
import re
import random
import keras

from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam, SGD
from tqdm import tqdm
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from transformers import logging
from keras.callbacks import EarlyStopping, ModelCheckpoint


logging.set_verbosity_warning()
logging.set_verbosity_error()

os.environ['PYTHONHASHSEED']=str(seed)
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

# Read Data

In [None]:
data = pd.read_csv('/kaggle/input/accident/twitter_label_manual.csv')
data = data[['full_text', 'is_accident']].rename(columns={'is_accident': 'label'})
data

In [None]:
data.isna().sum()

In [None]:
data['label'].value_counts()

# Model

In [None]:
pretrained_indobert = 'indolem/indobert-base-uncased'
pretrained_indobertweet = 'indolem/indobertweet-base-uncased'
pretrained_roberta_wiki = 'cahya/roberta-base-indonesian-522M'
pretrained_roberta_oscar = 'flax-community/indonesian-roberta-base'

# Clean Data

In [None]:
stop_words = StopWordRemoverFactory().get_stop_words()
len(stop_words)

In [None]:
import emoji

def remove_emoji(text):
    allchars = [str for str in text.decode('utf-8')]
    emoji_list = [c for c in allchars if c in emoji.UNICODE_EMOJI]
    clean_text = ' '.join([str for str in text.decode('utf-8').split() if not any(i in str for i in emoji_list)])
    return clean_text

In [None]:
def clean_tweet(text):
        result = text.lower()
        result = remove_emoji(result) #remove emoji
        result = re.sub('\n', ' ', result) #remove new line
        result = re.sub(r'@\w+', ' ', result) #remove user mention
        result = re.sub(r'http\S+', ' ', result) #remove link
        result = re.sub(r'\d+', '', result) #remove number
        result = result.translate(str.maketrans('', '', string.punctuation)) #remove punctuation
        result = ' '.join([word for word in result.split() if word not in stop_words]) #remove stopword

        return result.strip()

In [None]:
data['text'] = data['full_text'].apply(lambda x: clean_tweet(x))
data = data.drop(columns=['full_text'])
data

In [None]:
from statistics import mean

max_len = mean(data.astype('str').applymap(lambda x: len(x)).max())
print(f'Average text length: {max_len}')

In [None]:
train_valid, test_data = train_test_split(data, test_size=0.2, random_state=seed)
train_data, validation_data = train_test_split(train_valid, test_size=0.1, random_state=seed)
print(f'Train data size: {train_data.shape}')
print(f'Validation data size: {validation_data.shape}')
print(f'Test data size: {test_data.shape}')

# Tokenizer

In [None]:
def encode(tokenizer, data) :
    input_ids = []
    attention_masks = []
  
    for text in data['text']:
        encoded = tokenizer(text, max_length=128, padding='max_length')
        
        input_ids.append(encoded['input_ids'])
        attention_masks.append(encoded['attention_mask'])
        
    return np.array(input_ids), np.array(attention_masks)

# Training Related Function

In [None]:
def save_history(history, lr, batch, model):
    history_df = pd.DataFrame(history.history)
    
    path = f'results/history'
    
    if not os.path.exists(path):
        os.makedirs(path)
    
    history_df.to_csv(f'{path}/{model}_batch={batch}_lr={lr}.csv')
        

In [None]:
def save_graph(history, lr, batch, model):
    
    path = f'results/graph/{model}_batch={batch}_lr={lr}'
    
    if not os.path.exists(path):
        os.makedirs(path)
    
    plt.plot(range(1, len(history.history['accuracy'])+1), history.history['accuracy'], label='Train')
    plt.plot(range(1, len(history.history['val_accuracy'])+1),history.history['val_accuracy'], label='Validation')
    plt.title(f'{model} Accuracy Curves\nBatch Size = {batch}, Learning Rate = {lr}')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend()
    plt.savefig(f'{path}/{model}_batch={batch}_lr={lr}_accuracy.png', facecolor='white', dpi=300)
    plt.close()
    
    plt.plot(range(1, len(history.history['loss'])+1), history.history['loss'], label='Train')
    plt.plot(range(1, len(history.history['val_loss'])+1), history.history['val_loss'], label='Validation')
    plt.title(f'{model} Loss Curves\nBatch Size = {batch}, Learning Rate = {lr}')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend()
    plt.savefig(f'{path}/{model}_batch={batch}_lr={lr}_loss.png', facecolor='white', dpi=300)
    plt.close()
    
    plt.plot(range(1, len(history.history['f1'])+1), history.history['f1'], label='Train')
    plt.plot(range(1, len(history.history['val_f1'])+1), history.history['val_f1'], label='Validation')
    plt.title(f'{model} F1-Score Curves\nBatch Size = {batch}, Learning Rate = {lr}')
    plt.ylabel('F1-Score')
    plt.xlabel('Epoch')
    plt.legend()
    plt.savefig(f'{path}/{model}_batch={batch}_lr={lr}_f1.png', facecolor='white', dpi=300)
    plt.close()
    
    plt.plot(range(1, len(history.history['precision'])+1), history.history['precision'], label='Train')
    plt.plot(range(1, len(history.history['val_precision'])+1), history.history['val_precision'], label='Validation')
    plt.title(f'{model} Precision Curves\nBatch Size = {batch}, Learning Rate = {lr}')
    plt.ylabel('Precision')
    plt.xlabel('Epoch')
    plt.legend()
    plt.savefig(f'{path}/{model}_batch={batch}_lr={lr}_precision.png', facecolor='white', dpi=300)
    plt.close()
    
    plt.plot(range(1, len(history.history['recall'])+1), history.history['recall'], label='Train')
    plt.plot(range(1, len(history.history['val_recall'])+1), history.history['val_recall'], label='Validation')
    plt.title(f'{model} Recall Curves\nBatch Size = {batch}, Learning Rate = {lr}')
    plt.ylabel('Recall')
    plt.xlabel('Epoch')
    plt.legend()
    plt.savefig(f'{path}/{model}_batch={batch}_lr={lr}_recall.png', facecolor='white', dpi=300)
    plt.close()

In [None]:
from keras import backend as K

def recall(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1(y_true, y_pred):
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    return 2*((p*r)/(p+r+K.epsilon()))

# IndoBERT

In [None]:
from transformers import BertTokenizer

indobert_tokenizer  = BertTokenizer.from_pretrained(pretrained_indobert)

In [None]:
train_input_ids, train_attention_masks = encode(indobert_tokenizer, train_data)
validation_input_ids, validation_attention_masks = encode(indobert_tokenizer, validation_data)
test_input_ids, test_attention_masks = encode(indobert_tokenizer, test_data)

In [None]:
def indobert(model, dropout, learning_rate):
    input_ids = tf.keras.Input(shape=(128,), dtype='int32')
    attention_masks = tf.keras.Input(shape=(128,), dtype='int32')
    
    output = model([input_ids,attention_masks])
    output = output[1]
    
    output = tf.keras.layers.Dense(32, activation='relu')(output)
    output = tf.keras.layers.Dropout(dropout)(output)

    output = tf.keras.layers.Dense(1, activation='sigmoid')(output)
    
    model = tf.keras.models.Model(inputs=[input_ids,attention_masks], outputs=output)
    
    model.compile(
        Adam(learning_rate=learning_rate), 
        loss='binary_crossentropy', 
        metrics=['accuracy', f1, precision, recall])
    
    return model

In [None]:
from transformers import TFBertModel

indobert_pretrained_model = TFBertModel.from_pretrained(pretrained_indobert, from_pt=True)

In [None]:
indobert_model = indobert(indobert_pretrained_model, dropout=0.1, learning_rate=5e-5)
indobert_model.summary()

In [None]:
from tensorflow.keras.utils import plot_model

path = 'illustration'
    
if not os.path.exists(path):
    os.makedirs(path)
    
plot_model(indobert_model, to_file=f'{path}/IndoBERT.png', expand_nested=True, show_shapes=True)

In [None]:
del(indobert_pretrained_model)
del(indobert_model)
keras.backend.clear_session()

In [None]:
train_targets = train_data['label'].values
validation_targets = validation_data['label'].values
test_targets = test_data['label'].values

In [None]:
learing_rate = [5e-5, 3e-5, 2e-5]
batch = [16, 32]

batch_for_test_result = []
learing_rate_for_test_result = []
test_loss = []
test_accuracy = []
test_f1 = []
test_precision = []
test_recall = []

early_stopping = EarlyStopping(monitor='val_f1', patience=3, verbose=0, mode='max')

if not os.path.exists('results/models'):
    os.makedirs('results/models')

for b in batch:   
    for lr in learing_rate:
        print(f'Training IndoBERT Model (Batch Size = {b}, Learning Rate = {lr})')

        indobert_pretrained_model = TFBertModel.from_pretrained(pretrained_indobert, from_pt=True)
        indobert_model = indobert(indobert_pretrained_model, dropout=0.1, learning_rate=lr)
        
        save_best = ModelCheckpoint(
            f'results/models/indobert_batch={b}_lr={lr}.hdf5', 
            save_best_only=True, 
            save_weights_only=True,
            monitor='val_f1', 
            mode='max'
        )

        history = indobert_model.fit(
            [train_input_ids,train_attention_masks], 
            train_targets, 
            validation_data=([validation_input_ids, validation_attention_masks], validation_targets),
            callbacks=[early_stopping, save_best],
            epochs=10, 
            batch_size=b,
            verbose=1
        )

        save_graph(history, lr, b, 'IndoBERT')
        save_history(history, lr, b, 'IndoBERT')

        print(f'Testing IndoBERT Model (Batch Size = {b}, Learning Rate = {lr})')
        
        indobert_model.load_weights(f'results/models/indobert_batch={b}_lr={lr}.hdf5')
        
        evaluation = indobert_model.evaluate(
            [test_input_ids, test_attention_masks], 
            test_targets, 
            batch_size=b,
            verbose=1
        )

        batch_for_test_result.append(b)
        learing_rate_for_test_result.append(lr)
        test_loss.append(evaluation[0])
        test_accuracy.append(evaluation[1])
        test_f1.append(evaluation[2])
        test_precision.append(evaluation[3])
        test_recall.append(evaluation[4])

        del(indobert_pretrained_model)
        del(indobert_model)
        del(history)
        del(evaluation)
        keras.backend.clear_session()


test_result_df = pd.DataFrame({
    'Training Batch Size': batch_for_test_result,
    'Training Learning Rate': learing_rate_for_test_result, 
    'Test Loss': test_loss, 
    'Test Accuracy': test_accuracy,
    'Test F1-Score': test_f1,
    'Test Precision': test_precision,
    'Test Recall': test_recall,
})

path = f'results/evaluation'
    
if not os.path.exists(path):
    os.makedirs(path)

test_result_df.to_csv(f'{path}/IndoBERT.csv', index=False)

# IndoBERTweet

In [None]:
from transformers import BertTokenizer

indobertweet_tokenizer  = BertTokenizer.from_pretrained(pretrained_indobertweet)

In [None]:
train_input_ids, train_attention_masks = encode(indobertweet_tokenizer, train_data)
validation_input_ids, validation_attention_masks = encode(indobertweet_tokenizer, validation_data)
test_input_ids, test_attention_masks = encode(indobertweet_tokenizer, test_data)

In [None]:
def indobertweet(model, dropout, learning_rate):
    input_ids = tf.keras.Input(shape=(128,), dtype='int32')
    attention_masks = tf.keras.Input(shape=(128,), dtype='int32')
    
    output = model([input_ids,attention_masks])
    output = output[1]
    
    output = tf.keras.layers.Dense(32, activation='relu')(output)
    output = tf.keras.layers.Dropout(dropout)(output)

    output = tf.keras.layers.Dense(1, activation='sigmoid')(output)
    
    model = tf.keras.models.Model(inputs=[input_ids,attention_masks], outputs=output)
    
    model.compile(
        Adam(learning_rate=learning_rate), 
        loss='binary_crossentropy', 
        metrics=['accuracy', f1, precision, recall]
    )
    
    return model

In [None]:
from transformers import TFBertModel

indobertweet_pretrained_model = TFBertModel.from_pretrained(pretrained_indobertweet, from_pt=True)

In [None]:
indobertweet_model = indobertweet(indobertweet_pretrained_model, dropout=0.1, learning_rate=5e-5)
indobertweet_model.summary()

In [None]:
from tensorflow.keras.utils import plot_model

path = 'illustration'
    
if not os.path.exists(path):
    os.makedirs(path)
    
plot_model(indobertweet_model, to_file=f'{path}/IndoBERTweet.png', expand_nested=True, show_shapes=True)

In [None]:
del(indobertweet_pretrained_model)
del(indobertweet_model)
keras.backend.clear_session()

In [None]:
train_targets = train_data['label'].values
validation_targets = validation_data['label'].values
test_targets = test_data['label'].values

In [None]:
learing_rate = [5e-5, 3e-5, 2e-5]
batch = [16, 32]

batch_for_test_result = []
learing_rate_for_test_result = []
test_loss = []
test_accuracy = []
test_f1 = []
test_precision = []
test_recall = []

early_stopping = EarlyStopping(monitor='val_f1', patience=3, verbose=0, mode='max')

if not os.path.exists('results/models'):
    os.makedirs('results/models')

for b in batch:
    for lr in learing_rate:
        print(f'Training IndoBERTweet Model (Batch Size = {b}, Learning Rate = {lr})')

        indobertweet_pretrained_model = TFBertModel.from_pretrained(pretrained_indobertweet, from_pt=True)
        indobertweet_model = indobertweet(indobertweet_pretrained_model, dropout=0.1, learning_rate=lr)
        
        save_best = ModelCheckpoint(
            f'results/models/indobertweet_batch={b}_lr={lr}.hdf5', 
            save_best_only=True, 
            save_weights_only=True,
            monitor='val_f1', 
            mode='max'
        )

        history = indobertweet_model.fit(
            [train_input_ids,train_attention_masks], 
            train_targets, 
            validation_data=([validation_input_ids, validation_attention_masks], validation_targets),
            callbacks=[early_stopping, save_best],
            epochs=10, 
            batch_size=b,
            verbose=1
        )

        save_graph(history, lr, b, 'IndoBERTweet')
        save_history(history, lr, b, 'IndoBERTweet')

        print(f'Testing IndoBERTweet Model (Batch Size = {b}, Learning Rate = {lr})')
        
        indobertweet_model.load_weights(f'results/models/indobertweet_batch={b}_lr={lr}.hdf5')
        
        evaluation = indobertweet_model.evaluate(
            [test_input_ids, test_attention_masks], 
            test_targets, 
            batch_size=b,
            verbose=1
        )
        
        batch_for_test_result.append(b)
        learing_rate_for_test_result.append(lr)
        test_loss.append(evaluation[0])
        test_accuracy.append(evaluation[1])
        test_f1.append(evaluation[2])
        test_precision.append(evaluation[3])
        test_recall.append(evaluation[4])

        del(indobertweet_pretrained_model)
        del(indobertweet_model)
        del(history)
        del(evaluation)
        keras.backend.clear_session()


test_result_df = pd.DataFrame({
    'Training Batch Size': batch_for_test_result,
    'Training Learning Rate': learing_rate_for_test_result, 
    'Test Loss': test_loss, 
    'Test Accuracy': test_accuracy,
    'Test F1-Score': test_f1,
    'Test Precision': test_precision,
    'Test Recall': test_recall,
})

path = f'results/evaluation'
    
if not os.path.exists(path):
    os.makedirs(path)

test_result_df.to_csv(f'{path}/IndoBERTweet.csv', index=False)

# RoBERTa Wiki

In [None]:
from transformers import RobertaTokenizer

roberta_wiki_tokenizer  = RobertaTokenizer.from_pretrained(pretrained_roberta_wiki)

In [None]:
train_input_ids, train_attention_masks = encode(roberta_wiki_tokenizer, train_data)
validation_input_ids, validation_attention_masks = encode(roberta_wiki_tokenizer, validation_data)
test_input_ids, test_attention_masks = encode(roberta_wiki_tokenizer, test_data)

In [None]:
def robertawiki(model, dropout, learning_rate):
    input_ids = tf.keras.Input(shape=(128,), dtype='int32')
    attention_masks = tf.keras.Input(shape=(128,), dtype='int32')
    
    output = model([input_ids,attention_masks])
    output = output[1]
    
    output = tf.keras.layers.Dense(32, activation='relu')(output)
    output = tf.keras.layers.Dropout(dropout)(output)

    output = tf.keras.layers.Dense(1, activation='sigmoid')(output)
    
    model = tf.keras.models.Model(inputs=[input_ids,attention_masks], outputs=output)
    
    model.compile(
        Adam(learning_rate=learning_rate), 
        loss='binary_crossentropy', 
        metrics=['accuracy', f1, precision, recall]
    )
    
    return model

In [None]:
from transformers import TFRobertaModel

robertawiki_pretrained_model = TFRobertaModel.from_pretrained(pretrained_roberta_wiki)

In [None]:
robertawiki_model = robertawiki(robertawiki_pretrained_model, dropout=0.1, learning_rate=5e-5)
robertawiki_model.summary()

In [None]:
from tensorflow.keras.utils import plot_model

path = 'illustration'
    
if not os.path.exists(path):
    os.makedirs(path)
    
plot_model(robertawiki_model, to_file=f'{path}/Indonesian RoBERTa Wiki.png', expand_nested=True, show_shapes=True)

In [None]:
del(robertawiki_pretrained_model)
del(robertawiki_model)
keras.backend.clear_session()

In [None]:
train_targets = train_data['label'].values
validation_targets = validation_data['label'].values
test_targets = test_data['label'].values

In [None]:
learing_rate = [1e-5, 2e-5, 3e-5]
batch = [16, 32]

batch_for_test_result = []
learing_rate_for_test_result = []
test_loss = []
test_accuracy = []
test_f1 = []
test_precision = []
test_recall = []

early_stopping = EarlyStopping(monitor='val_f1', patience=3, verbose=0, mode='max')

if not os.path.exists('results/models'):
    os.makedirs('results/models')

for b in batch:
    for lr in learing_rate:
        print(f'Training RoBERTa Wiki Model (Batch Size = {b}, Learning Rate = {lr})')

        robertawiki_pretrained_model = TFRobertaModel.from_pretrained(pretrained_roberta_wiki)
        robertawiki_model = robertawiki(robertawiki_pretrained_model, dropout=0.1, learning_rate=lr)
        
        save_best = ModelCheckpoint(
            f'results/models/robertawiki_batch={b}_lr={lr}.hdf5', 
            save_best_only=True, 
            save_weights_only=True,
            monitor='val_f1', 
            mode='max'
        )

        history = robertawiki_model.fit(
            [train_input_ids,train_attention_masks], 
            train_targets, 
            validation_data=([validation_input_ids, validation_attention_masks], validation_targets),
            callbacks=[early_stopping, save_best],
            epochs=4, 
            batch_size=b,
            verbose=1
        )

        save_graph(history, lr, b, 'RoBERTa Wiki')
        save_history(history, lr, b, 'RoBERTa Wiki')

        print(f'Testing RoBERTa Wiki Model (Batch Size = {b}, Learning Rate = {lr})')
        
        robertawiki_model.load_weights(f'results/models/robertawiki_batch={b}_lr={lr}.hdf5')
        
        evaluation = robertawiki_model.evaluate(
            [test_input_ids, test_attention_masks], 
            test_targets, 
            batch_size=b,
            verbose=1
        )
        
        batch_for_test_result.append(b)
        learing_rate_for_test_result.append(lr)
        test_loss.append(evaluation[0])
        test_accuracy.append(evaluation[1])
        test_f1.append(evaluation[2])
        test_precision.append(evaluation[3])
        test_recall.append(evaluation[4])

        del(robertawiki_pretrained_model)
        del(robertawiki_model)
        del(history)
        del(evaluation)
        keras.backend.clear_session()


test_result_df = pd.DataFrame({
    'Training Batch Size': batch_for_test_result,
    'Training Learning Rate': learing_rate_for_test_result, 
    'Test Loss': test_loss, 
    'Test Accuracy': test_accuracy,
    'Test F1-Score': test_f1,
    'Test Precision': test_precision,
    'Test Recall': test_recall,
})

path = f'results/evaluation'
    
if not os.path.exists(path):
    os.makedirs(path)

test_result_df.to_csv(f'{path}/RoBERTa Wiki.csv', index=False)

# RoBERTa OSCAR

In [None]:
from transformers import RobertaTokenizer

roberta_oscar_tokenizer  = RobertaTokenizer.from_pretrained(pretrained_roberta_oscar)

In [None]:
train_input_ids, train_attention_masks = encode(roberta_oscar_tokenizer, train_data)
validation_input_ids, validation_attention_masks = encode(roberta_oscar_tokenizer, validation_data)
test_input_ids, test_attention_masks = encode(roberta_oscar_tokenizer, test_data)

In [None]:
def robertaoscar(model, dropout, learning_rate):
    input_ids = tf.keras.Input(shape=(128,), dtype='int32')
    attention_masks = tf.keras.Input(shape=(128,), dtype='int32')
    
    output = model([input_ids,attention_masks])
    output = output[1]
    
    output = tf.keras.layers.Dense(32, activation='relu')(output)
    output = tf.keras.layers.Dropout(dropout)(output)

    output = tf.keras.layers.Dense(1, activation='sigmoid')(output)
    
    model = tf.keras.models.Model(inputs=[input_ids,attention_masks], outputs=output)
    
    model.compile(
        Adam(learning_rate=learning_rate), 
        loss='binary_crossentropy', 
        metrics=['accuracy', f1, precision, recall]
    )
    
    return model

In [None]:
from transformers import TFRobertaModel

robertaoscar_pretrained_model = TFRobertaModel.from_pretrained(pretrained_roberta_oscar, from_pt=True)

In [None]:
robertaoscar_model = robertaoscar(robertaoscar_pretrained_model, dropout=0.1, learning_rate=5e-5)
robertaoscar_model.summary()

In [None]:
from tensorflow.keras.utils import plot_model

path = 'illustration'
    
if not os.path.exists(path):
    os.makedirs(path)
    
plot_model(robertaoscar_model, to_file=f'{path}/Indonesian RoBERTa OSCAR.png', expand_nested=True, show_shapes=True)

In [None]:
del(robertaoscar_pretrained_model)
del(robertaoscar_model)
keras.backend.clear_session()

In [None]:
train_targets = train_data['label'].values
validation_targets = validation_data['label'].values
test_targets = test_data['label'].values

In [None]:
learing_rate = [1e-5, 2e-5, 3e-5]
batch = [16, 32]

batch_for_test_result = []
learing_rate_for_test_result = []
test_loss = []
test_accuracy = []
test_f1 = []
test_precision = []
test_recall = []

early_stopping = EarlyStopping(monitor='val_f1', patience=3, verbose=0, mode='max')

if not os.path.exists('results/models'):
    os.makedirs('results/models')

for b in batch:
    for lr in learing_rate:
        print(f'Training RoBERTa OSCAR Model (Batch Size = {b}, Learning Rate = {lr})')

        robertaoscar_pretrained_model = TFRobertaModel.from_pretrained(pretrained_roberta_oscar, from_pt=True)
        robertaoscar_model = robertaoscar(robertaoscar_pretrained_model, dropout=0.1, learning_rate=lr)
        
        save_best = ModelCheckpoint(
            f'results/models/robertaoscar_batch={b}_lr={lr}.hdf5', 
            save_best_only=True, 
            save_weights_only=True,
            monitor='val_f1', 
            mode='max'
        )

        history = robertaoscar_model.fit(
            [train_input_ids,train_attention_masks], 
            train_targets, 
            validation_data=([validation_input_ids, validation_attention_masks], validation_targets),
            callbacks=[early_stopping, save_best],
            epochs=4, 
            batch_size=b,
            verbose=1
        )

        save_graph(history, lr, b, 'RoBERTa OSCAR')
        save_history(history, lr, b, 'RoBERTa OSCAR')

        print(f'Testing RoBERTa OSCAR Model (Batch Size = {b}, Learning Rate = {lr})')
        
        robertaoscar_model.load_weights(f'results/models/robertaoscar_batch={b}_lr={lr}.hdf5')
        
        evaluation = robertaoscar_model.evaluate(
            [test_input_ids, test_attention_masks], 
            test_targets, 
            batch_size=b,
            verbose=1
        )
        
        batch_for_test_result.append(b)
        learing_rate_for_test_result.append(lr)
        test_loss.append(evaluation[0])
        test_accuracy.append(evaluation[1])
        test_f1.append(evaluation[2])
        test_precision.append(evaluation[3])
        test_recall.append(evaluation[4])

        del(robertaoscar_pretrained_model)
        del(robertaoscar_model)
        del(history)
        del(evaluation)
        keras.backend.clear_session()


test_result_df = pd.DataFrame({
    'Training Batch Size': batch_for_test_result,
    'Training Learning Rate': learing_rate_for_test_result, 
    'Test Loss': test_loss, 
    'Test Accuracy': test_accuracy,
    'Test F1-Score': test_f1,
    'Test Precision': test_precision,
    'Test Recall': test_recall,
})

path = f'results/evaluation'
    
if not os.path.exists(path):
    os.makedirs(path)

test_result_df.to_csv(f'{path}/RoBERTa OSCAR.csv', index=False)