In [1]:
import os
import numpy as np
import pandas as pd
# from tqdm.auto import tqdm
from tqdm import tqdm
tqdm.pandas(position=0,leave=True)
import random
import argparse
import logging

import sklearn
from sklearn import metrics
from sklearn.metrics import roc_auc_score, f1_score,average_precision_score
from sklearn.metrics import precision_recall_fscore_support 
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import auc as auc_score
from sklearn.metrics import roc_curve
from sklearn.preprocessing import LabelEncoder, label_binarize

import textwrap

from datasets import load_dataset, load_metric, concatenate_datasets,DatasetDict,Dataset
from datasets import load_from_disk

import transformers
print("Transformers version is {}".format(transformers.__version__))

import torch
from torch.utils.data import DataLoader, RandomSampler

from transformers import (
    AdamW,
    AutoConfig,
    AutoModel,
    AutoModelWithLMHead,
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    AutoModelForSequenceClassification,
    default_data_collator,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    get_linear_schedule_with_warmup,
    get_scheduler
)

import utils

import seaborn as sns
from pylab import rcParams
from matplotlib import pyplot as plt
from matplotlib import rc

sns.set(style="whitegrid",palette='muted',font_scale=1.2)
# rcParams['figure.figsize']=16,10

%config InlineBackend.figure_format="retina"
%matplotlib inline

  from .autonotebook import tqdm as notebook_tqdm


Transformers version is 4.19.0


In [2]:
def seed_everything(seed):
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

In [3]:
input_dir="s3://trident-retention-output/"
output_dir="s3://trident-retention-output/multi-class/"

askunum_text=pd.read_pickle(os.path.join(input_dir,"askunum_text"))
askunum_text['Subtype'] = askunum_text['Subtype'].fillna("").astype(str).str.lower()
askunum_text["Subtype"]=askunum_text["Subtype"].progress_apply(lambda x: x.encode("latin1").decode("cp1252"))
askunum_text["Subtype"]=askunum_text["Subtype"].str.replace("/"," or ")
askunum_text["Subtype"]=askunum_text["Subtype"].str.replace("&"," and ")
askunum_text["Subtype"]=askunum_text["Subtype"].str.replace(r"\s{2,}", " ", regex=True)

100%|██████████| 1452978/1452978 [00:02<00:00, 655461.02it/s]


In [4]:
kwargs={}
kwargs["billing_issue"]=["bill","billing"]
kwargs["claim_issue"]=["claim","claims"]
kwargs["eoi_issue"]=["eoi"]
kwargs["new_plan_admin"]=["admin","administrator"]

sample_class=utils.Sample_Creation(askunum_text, **kwargs)
train_df,val_df,test_df=sample_class.data_creation(val_ratio=0.10, test_ratio=0.10)

  train_df=train_df.append(_train)
  val_df=val_df.append(_val)
  test_df=test_df.append(_test)
  train_df=train_df.append(_train)
  val_df=val_df.append(_val)
  test_df=test_df.append(_test)
  train_df=train_df.append(_train)
  val_df=val_df.append(_val)
  test_df=test_df.append(_test)
  train_df=train_df.append(_train)
  val_df=val_df.append(_val)
  test_df=test_df.append(_test)
  train_df=train_df.append(_train)
  val_df=val_df.append(_val)
  test_df=test_df.append(_test)


In [5]:
def label_distribution(df,col):
    tempt1=pd.DataFrame(df[col].value_counts(dropna=False)).reset_index().rename(columns={'index':col,col:'count'})
    tempt2=pd.DataFrame(df[col].value_counts(dropna=False,normalize=True)).reset_index().rename(columns={'index':col,col:'percentage'})
    return tempt1.merge(tempt2, on=col, how="inner")

def style_format(df, col, data_type="Training set"):
    return df.style.format({'count':'{:,}','percentage':'{:.2%}'})\
           .set_caption(f"{data_type} {col} distribution")\
           .set_table_styles([{'selector': 'caption','props': [('color', 'red'),('font-size', '15px')]}])

In [6]:
label_train=label_distribution(train_df,col="new_category")
label_train.sort_values("new_category",inplace=True)
style_format(label_train,col="new_category",  data_type="Training set")

Unnamed: 0,new_category,count,percentage
4,billing_issue,9445,6.23%
0,claim_issue,54270,35.80%
1,eoi_issue,45083,29.74%
3,new_plan_admin,12490,8.24%
2,other-category,30322,20.00%


In [7]:
label_test=label_distribution(test_df,col="new_category")
label_test.sort_values("new_category",inplace=True)
style_format(label_test,col="new_category",  data_type="Test set")

Unnamed: 0,new_category,count,percentage
4,billing_issue,1180,6.23%
0,claim_issue,6783,35.80%
1,eoi_issue,5635,29.74%
3,new_plan_admin,1561,8.24%
2,other-category,3790,20.00%


In [None]:
# wrapper = textwrap.TextWrapper(width=150) 
# # Randomly choose some examples.
# for i in range(10):
#     random.seed(101+i)

#     j = random.choice(train_df.index)
#     emails=train_df.loc[j,"TextBody"]
#     subtype=train_df.loc[j,"Subtype"]
#     category=train_df.loc[j,"new_category"]
#     unum_id=train_df.loc[j,"unum_id"]

#     print('')
#     print("*"*80)
#     print(f'*  Full TextBody : unum_id={unum_id}, category={category}, subtype={subtype} *')
#     print("*"*80)
#     print('')
#     # print(j)
#     print(wrapper.fill(emails))
#     print('')
#     print("*"*50)

In [9]:
parser = argparse.ArgumentParser(description='Model Inference')
parser.add_argument('--gpus', type=int, default=[0,1], nargs='+', help='used gpu')
parser.add_argument("--shuffle_train",  type=bool,default=True,help="shuffle data or not")
parser.add_argument('--val_ratio', type=float, default=0.1)
parser.add_argument('--test_ratio', type=float, default=0.1)
parser.add_argument("--loss_weight", action='store_true', help="weight for unbalance data")
parser.add_argument("--seed",  type=int,default=101,
        help="random seed for np.random.seed, torch.manual_seed and torch.cuda.manual_seed.")

parser.add_argument("--truncation_strategy", type=str, default="head",help="how to truncate the long length email")
parser.add_argument("--batch_size", type=int, default=8)
parser.add_argument("--model_path",type=str,default="/home/ec2-user/SageMaker/retention_model_NLP/v2_src/multi-class/roberta_large_repo")
parser.add_argument("--feature_name", default="TextBody", type=str)
parser.add_argument("--is_train_inference", action="store_true", help="undersampling or not")

args,_= parser.parse_known_args()

args.loss_weight=True
args.batch_size=64

print(args)

Namespace(batch_size=64, feature_name='TextBody', gpus=[0, 1], is_train_inference=False, loss_weight=True, model_path='/home/ec2-user/SageMaker/retention_model_NLP/v2_src/multi-class/roberta_large_repo', seed=101, shuffle_train=True, test_ratio=0.1, truncation_strategy='head', val_ratio=0.1)


In [10]:
seed_everything(args.seed)

def cate_2_int_label(df,col):
    uniq_label=df[col].unique()
    uniq_label.sort()
    label_map={v:idx for idx,v in enumerate(uniq_label)}
    df[col]=list(map(label_map.get, df[col]))
    df = df.rename(columns={col: 'label'})
    return df, label_map

train_df, train_label_map=cate_2_int_label(train_df,col="new_category")
val_df, val_label_map=cate_2_int_label(val_df,col="new_category")
test_df,  test_label_map=cate_2_int_label(test_df,col="new_category")

# train_df=train_df.sample(n=1000)
# val_df=val_df.sample(n=1000)
# test_df=test_df.sample(n=1000)

hf_train=Dataset.from_pandas(train_df)
hf_val=Dataset.from_pandas(val_df)
hf_test=Dataset.from_pandas(test_df)
# hf_data=DatasetDict({"train":hf_train, "val":hf_val,  "test":hf_test})
hf_data=concatenate_datasets([hf_train,  hf_val],split="train")
hf_data=DatasetDict({"train":hf_data, "test":hf_test})

hf_data=hf_data.filter(lambda x: x[args.feature_name]!=None)

train_label=train_df['label'].values.squeeze()
num_classes=np.unique(train_label).shape[0]

tokenizer=AutoTokenizer.from_pretrained(args.model_path)
model=AutoModelForSequenceClassification.from_pretrained(args.model_path, num_labels = num_classes)

print()
print(f"The maximal # input tokens : {tokenizer.model_max_length:,}")
print(f"Vocabulary size : {tokenizer.vocab_size:,}")
print(f"The # of parameters : {sum([p.nelement() for p in model.parameters()]):,}")
print()

hf_data=hf_data.map(lambda x: tokenizer(x[args.feature_name]),batched=True)

max_seq_length=tokenizer.model_max_length
def truncation_text(example):
    truncated_input_ids=tokenizer(example[args.feature_name],truncation=True,padding=False,return_tensors="pt",add_special_tokens=False)['input_ids']

    if args.truncation_strategy=="tail":
        truncated_input_ids=truncated_input_ids[:,-(max_seq_length - 2):].squeeze()
    elif args.truncation_strategy=="head":
        truncated_input_ids=truncated_input_ids[:,0:(max_seq_length - 2)].squeeze()
    elif args.truncation_strategy=="mixed":
        truncated_input_ids=truncated_input_ids[:(max_seq_length - 2) // 2] + truncated_input_ids[-((max_seq_length - 2) // 2):]
        truncated_input_ids=truncated_input_ids.squeeze()
    else:
        raise NotImplemented("Unknown truncation. Supported truncation: tail, head, mixed truncation")

    return {"truncated_text":tokenizer.decode(truncated_input_ids)}

hf_data=hf_data.map(truncation_text)
columns=hf_data['train'].column_names
columns_to_keep=['truncated_text','label']
columns_to_remove=set(columns)-set(columns_to_keep)
hf_data=hf_data.remove_columns(columns_to_remove)
hf_data=hf_data.rename_column("truncated_text", args.feature_name)

train_data=hf_data['train'].shuffle(seed=101).select(range(len(hf_data["train"])))
# val_data=hf_data['val'].shuffle(seed=101).select(range(len(hf_data["val"])))
test_data=hf_data['test'].shuffle(seed=101).select(range(len(hf_data["test"])))

os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(x) for x in args.gpus)
# print(f"The number of GPUs is {torch.cuda.device_count()}")
if torch.cuda.is_available():    
    device = torch.device("cuda")
    print()
    print('{:<30}{:<10}'.format("The # of availabe GPU(s): ",torch.cuda.device_count()))

    for i in range(torch.cuda.device_count()):
        print('{:<30}{:<10}'.format("GPU Name: ",torch.cuda.get_device_name(i)))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")
    
train_data.set_format(type="pandas")
df_train=train_data[:]
test_data.set_format(type="pandas")
df_test=test_data[:]

train_data=Dataset.from_pandas(df_train)
test_data=Dataset.from_pandas(df_test)


train_module=utils.Loader_Creation(train_data, tokenizer,args.feature_name)


test_module=utils.Loader_Creation(test_data, tokenizer,args.feature_name)

train_data.set_format(type="pandas")
df_train=train_data[:]
train_data.reset_format()

train_dataloader=DataLoader(train_module,
                            shuffle=True,
                            batch_size=args.batch_size,
                            collate_fn=train_module.collate_fn,
                            drop_last=False   # longformer model bug
                           )

test_dataloader=DataLoader(test_module,
                            shuffle=False,
                            batch_size=args.batch_size,
                            collate_fn=test_module.collate_fn
                           )

print()
print('{:<30}{:<10,} '.format("training mini-batch",len(train_dataloader)))
#     print('{:<30}{:<10,} '.format("validation mini-batch",len(valid_dataloader)))
print('{:<30}{:<10,} '.format("test mini-batch",len(test_dataloader)))


if args.loss_weight:
    train_classes_num, train_classes_weight = utils.get_class_count_and_weight(train_label,num_classes)
    loss_weight=torch.tensor(train_classes_weight).to(device)
else:
    loss_weight=None

100%|██████████| 171/171 [00:01<00:00, 118.05ba/s]
100%|██████████| 19/19 [00:00<00:00, 126.30ba/s]



The maximal # input tokens : 512
Vocabulary size : 50,265
The # of parameters : 355,364,869



  0%|          | 0/171 [00:00<?, ?ba/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1563 > 512). Running this sequence through the model will result in indexing errors
100%|██████████| 171/171 [00:31<00:00,  5.41ba/s]
100%|██████████| 19/19 [00:03<00:00,  5.48ba/s]
100%|██████████| 170561/170561 [04:47<00:00, 593.60ex/s]
100%|██████████| 18949/18949 [00:31<00:00, 602.84ex/s]



The # of availabe GPU(s):     2         
GPU Name:                     NVIDIA A10G
GPU Name:                     NVIDIA A10G


100%|██████████| 171/171 [00:26<00:00,  6.34ba/s]
100%|██████████| 19/19 [00:03<00:00,  6.24ba/s]



training mini-batch           2,666      
test mini-batch               297        


### Test Set

In [11]:
if args.is_train_inference:
    y_pred, y_target, losses_tmp=utils.eval_func(train_dataloader,model,device,num_classes=num_classes,loss_weight=loss_weight)
    label_map={v:k for k,v in train_label_map.items()}
else:
    y_pred, y_target, losses_tmp=utils.eval_func(test_dataloader,model,device,num_classes=num_classes,loss_weight=loss_weight)
    label_map={v:k for k,v in test_label_map.items()}

metrics_dict, roc_auc, pr_auc = utils.model_evaluate(y_target,y_pred)
print()
print("{:<20}{:<10.2%}".format("accuracy", metrics_dict['acc']))
print()
print("{:<20}{:<10,.2%}{:<16}{:<10,.2%}{:<18}{:<10,.2%}{:<17}{:<10,.2%}{:<16}{:<10,.2%}"\
      .format("precision(macro):",metrics_dict['prec_macro'],"recall(macro):",metrics_dict['recall_macro'],\
              "f1-score(macro):",metrics_dict['fscore_macro'],"ROC-AUC(macro):",metrics_dict['auc_macro_ovo'],\
             "PR-AUC(macro):",metrics_dict['pr_auc_macro']))

print("{:<20}{:<10,.2%}{:<16}{:<10,.2%}{:<18}{:<10,.2%}{:<17}{:<10,.2%}{:<16}{:<10,.2%}"\
      .format("precision(micro):",metrics_dict['prec_micro'],"recall(micro):",metrics_dict['recall_micro'],\
              "f1-score(micro):",metrics_dict['fscore_micro'],"ROC-AUC(micro):",metrics_dict['auc_micro'],\
             "PR-AUC(micro):",metrics_dict['pr_auc_micro']))

print("{:<20}{:<10,.2%}{:<16}{:<10,.2%}{:<18}{:<10,.2%}{:<17}{:<10,.2%}{:<16}{:<10,.2%}"\
      .format("precision(weight):",metrics_dict['prec_weighted'],"recall(weight):",metrics_dict['recall_weighted'],\
              "f1-score(weight):",metrics_dict['fscore_weighted'],"ROC-AUC(weight):",metrics_dict['auc_weighted_ovo'],\
             "PR-AUC(weight):",metrics_dict['pr_auc_weighted']))

100%|██████████| 297/297 [06:49<00:00,  1.38s/it]


accuracy            91.12%    

precision(macro):   87.75%    recall(macro):  90.63%    f1-score(macro):  89.04%    ROC-AUC(macro):  98.50%    PR-AUC(macro):  94.56%    
precision(micro):   91.12%    recall(micro):  91.12%    f1-score(micro):  91.12%    ROC-AUC(micro):  98.83%    PR-AUC(micro):  96.42%    
precision(weight):  91.27%    recall(weight): 91.12%    f1-score(weight): 91.14%    ROC-AUC(weight): 98.57%    PR-AUC(weight): 95.82%    


In [12]:
print()
print(label_map)

n_classes=len(label_map)

report=metrics.classification_report(y_target.squeeze(), y_pred.argmax(axis=1), output_dict=True)

table = pd.DataFrame(report).transpose().iloc[:n_classes,:]
table["count"]=table["support"].astype(int)
table["roc_auc"]=[roc_auc[i] for i in range(n_classes)]
table["pr_auc"]=[pr_auc[i] for i in range(n_classes)]
table["subtype_type"]=[label_map[i] for i in range(n_classes)]
table=table[['subtype_type','count','precision','recall','f1-score','roc_auc','pr_auc']]

total=table['count'].sum()

table.loc[len(table.index)]=["MACRO",total,metrics_dict['prec_macro'],metrics_dict['recall_macro'],metrics_dict['fscore_macro'],\
                        metrics_dict['auc_macro_ovo'],metrics_dict['pr_auc_macro']]

table.loc[len(table.index)]=["MICRO",total,metrics_dict['prec_micro'],metrics_dict['recall_micro'],metrics_dict['fscore_micro'],\
                            metrics_dict['auc_micro'],metrics_dict['pr_auc_micro']]

table.loc[len(table.index)]=["WEIGHT",total,metrics_dict['prec_weighted'],metrics_dict['recall_weighted'],metrics_dict['fscore_weighted'],\
                        metrics_dict['auc_weighted_ovo'],metrics_dict['pr_auc_weighted']]

table.style.format({"count":"{:,}","f1-score":"{:.2%}","precision":"{:.2%}","recall":"{:.2%}","roc_auc":"{:.2%}","pr_auc":"{:.2%}"})


{0: 'billing_issue', 1: 'claim_issue', 2: 'eoi_issue', 3: 'new_plan_admin', 4: 'other-category'}


Unnamed: 0,subtype_type,count,precision,recall,f1-score,roc_auc,pr_auc
0,billing_issue,1180,76.71%,90.42%,83.00%,99.13%,91.94%
1,claim_issue,6783,96.56%,95.50%,96.03%,99.40%,98.98%
2,eoi_issue,5635,94.70%,92.94%,93.81%,99.07%,98.06%
3,new_plan_admin,1561,88.29%,95.13%,91.58%,99.44%,95.73%
4,other-category,3790,82.49%,79.16%,80.79%,95.94%,88.08%
5,MACRO,18949,87.75%,90.63%,89.04%,98.50%,94.56%
6,MICRO,18949,91.12%,91.12%,91.12%,98.83%,96.42%
7,WEIGHT,18949,91.27%,91.12%,91.14%,98.57%,95.82%


### Training Set

In [None]:
y_pred, y_target, losses_tmp=utils.eval_func(train_dataloader,model,device,num_classes=num_classes,loss_weight=loss_weight)
label_map={v:k for k,v in train_label_map.items()}

metrics_dict, roc_auc, pr_auc = utils.model_evaluate(y_target,y_pred)
print()
print("{:<20}{:<10.2%}".format("accuracy", metrics_dict['acc']))
print()
print("{:<20}{:<10,.2%}{:<16}{:<10,.2%}{:<18}{:<10,.2%}{:<17}{:<10,.2%}{:<16}{:<10,.2%}"\
      .format("precision(macro):",metrics_dict['prec_macro'],"recall(macro):",metrics_dict['recall_macro'],\
              "f1-score(macro):",metrics_dict['fscore_macro'],"ROC-AUC(macro):",metrics_dict['auc_macro_ovo'],\
             "PR-AUC(macro):",metrics_dict['pr_auc_macro']))

print("{:<20}{:<10,.2%}{:<16}{:<10,.2%}{:<18}{:<10,.2%}{:<17}{:<10,.2%}{:<16}{:<10,.2%}"\
      .format("precision(micro):",metrics_dict['prec_micro'],"recall(micro):",metrics_dict['recall_micro'],\
              "f1-score(micro):",metrics_dict['fscore_micro'],"ROC-AUC(micro):",metrics_dict['auc_micro'],\
             "PR-AUC(micro):",metrics_dict['pr_auc_micro']))

print("{:<20}{:<10,.2%}{:<16}{:<10,.2%}{:<18}{:<10,.2%}{:<17}{:<10,.2%}{:<16}{:<10,.2%}"\
      .format("precision(weight):",metrics_dict['prec_weighted'],"recall(weight):",metrics_dict['recall_weighted'],\
              "f1-score(weight):",metrics_dict['fscore_weighted'],"ROC-AUC(weight):",metrics_dict['auc_weighted_ovo'],\
             "PR-AUC(weight):",metrics_dict['pr_auc_weighted']))

100%|██████████| 2666/2666 [1:01:23<00:00,  1.38s/it]



accuracy            91.56%    

precision(macro):   88.39%    recall(macro):  91.10%    f1-score(macro):  89.62%    ROC-AUC(macro):  98.59%    PR-AUC(macro):  94.99%    
precision(micro):   91.56%    recall(micro):  91.56%    f1-score(micro):  91.56%    ROC-AUC(micro):  98.90%    PR-AUC(micro):  96.65%    
precision(weight):  91.69%    recall(weight): 91.56%    f1-score(weight): 91.58%    ROC-AUC(weight): 98.65%    PR-AUC(weight): 96.10%    


In [None]:
print()
print(label_map)

n_classes=len(label_map)

report=metrics.classification_report(y_target.squeeze(), y_pred.argmax(axis=1), output_dict=True)

table = pd.DataFrame(report).transpose().iloc[:n_classes,:]
table["count"]=table["support"].astype(int)
table["roc_auc"]=[roc_auc[i] for i in range(n_classes)]
table["pr_auc"]=[pr_auc[i] for i in range(n_classes)]
table["subtype_type"]=[label_map[i] for i in range(n_classes)]
table=table[['subtype_type','count','precision','recall','f1-score','roc_auc','pr_auc']]

total=table['count'].sum()

table.loc[len(table.index)]=["MACRO",total,metrics_dict['prec_macro'],metrics_dict['recall_macro'],metrics_dict['fscore_macro'],\
                        metrics_dict['auc_macro_ovo'],metrics_dict['pr_auc_macro']]

table.loc[len(table.index)]=["MICRO",total,metrics_dict['prec_micro'],metrics_dict['recall_micro'],metrics_dict['fscore_micro'],\
                            metrics_dict['auc_micro'],metrics_dict['pr_auc_micro']]

table.loc[len(table.index)]=["WEIGHT",total,metrics_dict['prec_weighted'],metrics_dict['recall_weighted'],metrics_dict['fscore_weighted'],\
                        metrics_dict['auc_weighted_ovo'],metrics_dict['pr_auc_weighted']]

table.style.format({"count":"{:,}","f1-score":"{:.2%}","precision":"{:.2%}","recall":"{:.2%}","roc_auc":"{:.2%}","pr_auc":"{:.2%}"})


{0: 'billing_issue', 1: 'claim_issue', 2: 'eoi_issue', 3: 'new_plan_admin', 4: 'other-category'}


Unnamed: 0,subtype_type,count,precision,recall,f1-score,roc_auc,pr_auc
0,billing_issue,10626,78.44%,91.44%,84.44%,99.22%,92.67%
1,claim_issue,61054,96.91%,95.61%,96.25%,99.42%,98.96%
2,eoi_issue,50718,94.65%,93.45%,94.05%,99.14%,98.10%
3,new_plan_admin,14051,88.55%,94.79%,91.56%,99.46%,96.18%
4,other-category,34112,83.39%,80.23%,81.78%,96.16%,89.06%
5,MACRO,170561,88.39%,91.10%,89.62%,98.59%,94.99%
6,MICRO,170561,91.56%,91.56%,91.56%,98.90%,96.65%
7,WEIGHT,170561,91.69%,91.56%,91.58%,98.65%,96.10%


In [22]:
pd.DataFrame(report).transpose().iloc[:n_classes,:]

Unnamed: 0,precision,recall,f1-score,support
0,0.784434,0.914361,0.844429,10626.0
1,0.969055,0.956072,0.96252,61054.0
2,0.946465,0.93454,0.940464,50718.0
3,0.885454,0.947904,0.915615,14051.0
4,0.833902,0.802269,0.81778,34112.0


In [13]:
def metric_table(table_name="metrics_training.txt"):
    Model_Type=[]
    EPOCH=[]
    LOSS=[]
    Accuracy=[]
    False_Prediction=[]
    Accuracy=[]
    fscore_micro=[]
    fscore_macro=[]
    fscore_weighted=[]
    auc_micro=[]
    auc_macro=[]
    auc_weighted=[]

    with open(os.path.join(os.getcwd(),table_name),'r') as f:
        for line in f:
            Model_Type.append(str(line.split(",")[0]))
            EPOCH.append(int(line.split(",")[1]))
            LOSS.append(float(line.split(",")[2]))
            Accuracy.append(float(line.split(",")[3]))
            fscore_micro.append(float(line.split(",")[4]))
            fscore_macro.append(float(line.split(",")[5]))
            fscore_weighted.append(float(line.split(",")[6]))
            auc_micro.append(float(line.split(",")[7]))
            auc_macro.append(float(line.split(",")[8]))
            auc_weighted.append(float(line.split(",")[9]))


    metrics=pd.DataFrame({"model_type":Model_Type,"epoch":EPOCH,"loss":LOSS,"Accuracy":Accuracy,"F1-Score-Micro":fscore_micro,"F1-Score-Macro":fscore_macro,\
                         "F1-Score-Weighted":fscore_weighted,"AUC_Micro":auc_micro,"AUC-Macro":auc_macro,"AUC-Weighted":auc_weighted})
    metrics.drop_duplicates(subset=["model_type","epoch"],inplace=True)
    metrics.sort_values(by=['model_type','epoch'],inplace=True)       
    
    return metrics

def style_format(metrics_training, metrics_test, model):
    metrics_training=metrics_training[metrics_training["model_type"]==model].reset_index(drop=True)
    metrics_training=metrics_training.sort_values('F1-Score-Weighted', ascending=False).head(1)
    metrics_training.drop("epoch",inplace=True,axis=1)
    metrics_training["data"]=["training set"]
    
    metrics_test=metrics_test[metrics_test["model_type"]==model].reset_index(drop=True)
    metrics_test=metrics_test.sort_values('F1-Score-Weighted', ascending=False).head(1)
    metrics_test.drop("epoch",inplace=True,axis=1)
    metrics_test["data"]=["test set"]
    
    metrics=pd.concat([metrics_training,metrics_test])
    first_column =  metrics.pop('data')
    metrics.insert(0, 'data', first_column)
    
    return metrics.style.format({"loss":"{:.4f}","Accuracy":"{:.2%}","F1-Score-Micro":"{:.2%}","F1-Score-Macro":"{:.2%}", "F1-Score-Weighted":"{:.2%}", "AUC_Micro":"{:.2%}", \
                                "AUC-Macro":"{:.2%}", "AUC-Weighted":"{:.2%}"}) \
    .set_caption(f"Performance Summary for-- {model}") \
    .set_table_styles([{
        'selector': 'caption',
        'props': [
            ('color', 'red'),
            ('font-size', '20px')
        ]
    }])

In [14]:
metric_training=metric_table(table_name="metrics_training.txt")
metric_test=metric_table(table_name="metrics_test.txt")

In [15]:
style_format(metric_training,metric_test, model="bert_base")

Unnamed: 0,data,model_type,loss,Accuracy,F1-Score-Micro,F1-Score-Macro,F1-Score-Weighted,AUC_Micro,AUC-Macro,AUC-Weighted
0,training set,bert_base,0.4078,86.89%,86.89%,83.76%,86.98%,97.95%,97.17%,97.42%
0,test set,bert_base,0.4062,87.13%,87.13%,84.14%,87.21%,97.96%,97.21%,97.44%


In [16]:
style_format(metric_training,metric_test, model="bert_large")

Unnamed: 0,data,model_type,loss,Accuracy,F1-Score-Micro,F1-Score-Macro,F1-Score-Weighted,AUC_Micro,AUC-Macro,AUC-Weighted
3,training set,bert_large,0.3227,89.73%,89.73%,87.54%,89.79%,98.50%,98.08%,98.17%
0,test set,bert_large,0.3343,89.36%,89.36%,87.11%,89.44%,98.44%,98.00%,98.09%


In [17]:
style_format(metric_training,metric_test, model="roberta_base")

Unnamed: 0,data,model_type,loss,Accuracy,F1-Score-Micro,F1-Score-Macro,F1-Score-Weighted,AUC_Micro,AUC-Macro,AUC-Weighted
3,training set,roberta_base,0.3301,89.35%,89.35%,86.88%,89.43%,98.49%,98.05%,98.16%
0,test set,roberta_base,0.3359,89.21%,89.21%,86.79%,89.29%,98.45%,98.02%,98.12%


In [18]:
style_format(metric_training,metric_test, model="roberta_large")

Unnamed: 0,data,model_type,loss,Accuracy,F1-Score-Micro,F1-Score-Macro,F1-Score-Weighted,AUC_Micro,AUC-Macro,AUC-Weighted
1,training set,roberta_large,0.2695,91.56%,91.56%,89.61%,91.58%,98.90%,98.58%,98.65%
0,test set,roberta_large,0.2848,91.12%,91.12%,89.04%,91.14%,98.83%,98.50%,98.57%
