# Import Packages

In [84]:
import os
import pandas as pd
import numpy as np
import pickle
from datetime import datetime
import matplotlib.pyplot as plt
from transformers import TrainingArguments

import utility.ModelConfig as mc

import sqlite3

import utility.utility as util

# resets import once changes have been applied
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load ModelConfig

In [85]:
"""
path to local
"""
path_cwd = os.getcwd()

"""
name of modelconfig file
"""
_name_config_file = "ModelConfig_roberta-base_HalaJada_FinStmts_ConsUncons_Sliding_English_SeqClass_14_02_24_01_07.pkl"

"""
path to file with modelconfig
"""
path_file_modelconfig = os.path.join("modelconfigs", _name_config_file)

In [86]:
model_config = None
with open(os.path.join(path_cwd, path_file_modelconfig), "rb") as f:
    model_config = pickle.load(f)

# Read each attribute of an instance of ModelConfig

In [87]:
model_config.base_model

'roberta-base'

In [88]:
model_config.timestamp_initial

'14_02_24_01_07'

In [89]:
model_config.reset_model_head

False

In [90]:
model_config.task

'Binary Classification _ with study object and hps log history'

In [91]:
model_config.loss_fct

'ce'

In [92]:
model_config.from_hub

True

In [93]:
model_config.dataset_name_hub

'HalaJada/FinStmts_ConsUncons_Sliding_English_SeqClass'

In [94]:
model_config.dataset_name_local

''

In [95]:
model_config.path_dataset_local

'datasets/'

In [96]:
model_config.num_labels

2

In [97]:
model_config.weight_scheme

'rev_prop'

In [98]:
model_config.class_weights

[2.170212765957447, 1.8545454545454547]

In [99]:
model_config.eval_metrics

['accuracy', 'precision', 'recall', 'f1', 'matthews_correlation']

In [100]:
model_config.metric_best_model

'eval_matthews_correlation'

In [101]:
model_config.metric_direction

'maximize'

In [102]:
model_config.num_trials

1

In [103]:
model_config.frozen

'unfrozen'

In [104]:
model_config.best_run

BestRun(run_id='0', objective=0.8478319534691948, hyperparameters={'learning_rate': 7.619126050563358e-06, 'per_device_train_batch_size': 16, 'num_train_epochs': 3, 'weight_decay': 0.07369910091419253, 'warmup_ratio': 0.08071147902065257}, run_summary=None)

In [105]:
model_config.path_initial_training

'training_data/roberta-base/initial_training_14_02_24_01_07'

In [106]:
model_config.flag_mv

In [107]:
model_config.hps_log_df

Unnamed: 0,loss,learning_rate,eval_loss,eval_accuracy,eval_precision,eval_recall,eval_f1,eval_matthews_correlation,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,step,trial_no
0,0.3569,6e-06,0.289654,0.905172,0.928571,0.89418,0.911051,0.810251,22.4851,92.861,11.608,1.0,612,0
1,0.164,3e-06,0.272358,0.922414,0.911864,0.948854,0.929991,0.843904,22.4126,93.162,11.645,2.0,1224,0
2,0.106,0.0,0.330242,0.92433,0.91286,0.951499,0.931779,0.847832,22.3287,93.512,11.689,3.0,1836,0


In [108]:
model_config.study_name

'test'

In [109]:
model_config.path_study_db

'study_dbs/test_roberta-base_HalaJada_FinStmts_ConsUncons_Sliding_English_SeqClass_14_02_24_01_07.db'

In [110]:
# Create a connection to the database
conn = sqlite3.connect(os.path.join(path_cwd, model_config.path_study_db))
# Create a cursor object
cursor = conn.cursor()
# Execute the sql query
cursor.execute("SELECT * FROM trial_params")
# Fetch the results
tables = cursor.fetchall()

display(util.process_study_db_trial_params(tables))
conn.close()

Unnamed: 0,learning_rate,per_device_train_batch_size,num_train_epochs,weight_decay,warmup_ratio
0,8e-06,16,3.0,0.073699,0.080711


In [111]:
model_config.timestamp_final

'14_02_24_07_31'

In [112]:
model_config.path_final_training

'training_data/roberta-base/final_training_14_02_24_07_31'

In [113]:
model_config.path_trained_model

'trained_models/roberta-base_14_02_24_07_31'

In [114]:
model_config.training_log_df

Unnamed: 0,loss,learning_rate,eval_loss,eval_accuracy,eval_precision,eval_recall,eval_f1,eval_matthews_correlation,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,step
1,0.3324,6e-06,0.202017,0.920039,0.952068,0.89805,0.92427,0.841337,22.5138,92.21,11.548,1.0,743
2,0.1549,3e-06,0.240661,0.930154,0.958062,0.911348,0.934121,0.861115,22.6315,91.731,11.488,2.0,1486
3,0.1034,0.0,0.268799,0.937861,0.963788,0.920213,0.941497,0.876382,23.0092,90.225,11.3,3.0,2229


In [115]:
model_config.evaluation_results

{'accuracy': 0.9378612716763006,
 'precision': 0.9202127659574468,
 'recall': 0.9637883008356546,
 'f1': 0.9414965986394558,
 'matthews_correlation': 0.876381746900537}

In [116]:
pd.DataFrame(model_config.confusion_matrix, columns=['Predicted Negative', 'Predicted Positive'], index=['Actual Negative', 'Actual Positive'])

Unnamed: 0,Predicted Negative,Predicted Positive
Actual Negative,909,39
Actual Positive,90,1038


In [117]:
model_config.evaluation_results_mv

{'accuracy': 0.9682080924855492,
 'precision': 0.9680851063829787,
 'recall': 0.9732620320855615,
 'f1': 0.9706666666666668,
 'matthews_correlation': 0.9359827484868714}

In [118]:
pd.DataFrame(model_config.confusion_matrix_mv, columns=['Predicted Negative', 'Predicted Positive'], index=['Actual Negative', 'Actual Positive'])

Unnamed: 0,Predicted Negative,Predicted Positive
Actual Negative,153,5
Actual Positive,6,182
