In [1]:
import os
import numpy as np
import pandas as pd
# from tqdm.auto import tqdm
from tqdm import tqdm
tqdm.pandas(position=0,leave=True)
import random
import argparse
import logging

import sklearn
from sklearn import metrics
from sklearn.metrics import roc_auc_score, f1_score,average_precision_score
from sklearn.metrics import precision_recall_fscore_support 
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import auc as auc_score
from sklearn.metrics import roc_curve
from sklearn.preprocessing import LabelEncoder, label_binarize

import textwrap

from datasets import load_dataset, load_metric, concatenate_datasets,DatasetDict,Dataset
from datasets import load_from_disk

import transformers
print("Transformers version is {}".format(transformers.__version__))

import torch
from torch.utils.data import DataLoader, RandomSampler

from transformers import (
    AdamW,
    AutoConfig,
    AutoModel,
    AutoModelWithLMHead,
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    AutoModelForSequenceClassification,
    default_data_collator,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    get_linear_schedule_with_warmup,
    get_scheduler
)

import utils

import seaborn as sns
from pylab import rcParams
from matplotlib import pyplot as plt
from matplotlib import rc

sns.set(style="whitegrid",palette='muted',font_scale=1.2)
# rcParams['figure.figsize']=16,10

%config InlineBackend.figure_format="retina"
%matplotlib inline

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

  from .autonotebook import tqdm as notebook_tqdm


Transformers version is 4.19.0


In [2]:
def seed_everything(seed):
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

In [3]:
input_dir="s3://trident-retention-output/"
output_dir="s3://trident-retention-output/multi-class/"

askunum_text=pd.read_pickle(os.path.join(input_dir,"askunum_text_v1")) ## askunum_text_v1 group text by parentID and Subtype
askunum_text['Subtype'] = askunum_text['Subtype'].fillna("").astype(str).str.lower()
askunum_text["Subtype"]=askunum_text["Subtype"].progress_apply(lambda x: x.encode("latin1").decode("cp1252"))
askunum_text["Subtype"]=askunum_text["Subtype"].str.replace("/"," or ")
askunum_text["Subtype"]=askunum_text["Subtype"].str.replace("&"," and ")
askunum_text["Subtype"]=askunum_text["Subtype"].str.replace(r"\s{2,}", " ", regex=True)

100%|██████████| 2044010/2044010 [00:03<00:00, 678942.68it/s]


In [16]:
args_val=['bill hide or delete', "bill not received", "late notice or collections", "missing or skipped payment",'policy level discrepancy', 'premium discrepancy', 
              'broker of record change (bor)','missing information','request to speak to dbs','less than minimum lives', 'policy termination','new plan administrator']

sample_class=utils.Sample_Creation(askunum_text, *args_val)
train_df, val_df, test_df=sample_class.data_creation(val_ratio=0.1, test_ratio=0.1)

In [5]:
def label_distribution(df,col):
        tempt1=pd.DataFrame(df[col].value_counts(dropna=False)).reset_index().rename(columns={'index':col,col:'count'})
        tempt2=pd.DataFrame(df[col].value_counts(dropna=False,normalize=True)).reset_index().rename(columns={'index':col,col:'percentage'})
        return tempt1.merge(tempt2, on=col, how="inner")

def style_format(df, col, data_type="Training set"):
    return df.style.format({'count':'{:,}','percentage':'{:.2%}'})\
           .set_caption(f"{data_type} {col} distribution")\
           .set_table_styles([{'selector': 'caption','props': [('color', 'red'),('font-size', '15px')]}])

In [6]:
label_train=label_distribution(train_df,col="Subtype")
x1=label_train[label_train["Subtype"] != "other-category"]
x2=label_train[label_train["Subtype"] == "other-category"]
label_train=pd.concat([x1,x2])
style_format(label_train,col="Subtype",  data_type="Training set")

Unnamed: 0,Subtype,count,percentage
0,policy termination,28320,25.32%
1,missing information,23712,21.20%
2,broker of record change (bor),17451,15.60%
3,new plan administrator,16087,14.38%
5,premium discrepancy,6873,6.14%
6,bill not received,2841,2.54%
7,late notice or collections,2240,2.00%
8,missing or skipped payment,2200,1.97%
9,policy level discrepancy,1370,1.22%
10,less than minimum lives,1031,0.92%


In [7]:
label_test=label_distribution(test_df,col="Subtype")
x1=label_test[label_test["Subtype"] != "other-category"]
x2=label_test[label_test["Subtype"] == "other-category"]
label_test=pd.concat([x1,x2])
style_format(label_test,col="Subtype",  data_type="Test set")

Unnamed: 0,Subtype,count,percentage
0,policy termination,3540,25.33%
1,missing information,2964,21.21%
2,broker of record change (bor),2181,15.60%
3,new plan administrator,2010,14.38%
5,premium discrepancy,859,6.15%
6,bill not received,355,2.54%
7,late notice or collections,280,2.00%
8,missing or skipped payment,274,1.96%
9,policy level discrepancy,171,1.22%
10,less than minimum lives,128,0.92%


In [None]:
# wrapper = textwrap.TextWrapper(width=150) 
# # Randomly choose some examples.
# for i in range(10):
#     random.seed(101+i)

#     j = random.choice(train_df.index)
#     emails=train_df.loc[j,"TextBody"]
#     subtype=train_df.loc[j,"Subtype"]

#     print('')
#     print("*"*80)
#     print(f'*  Full TextBody :   subtype={subtype} *')
#     print("*"*80)
#     print('')
#     # print(j)
#     print(wrapper.fill(emails))
#     print('')
#     print("*"*50)

In [8]:
parser = argparse.ArgumentParser(description='Model Inference')
parser.add_argument('--gpus', type=int, default=[0,1], nargs='+', help='used gpu')
parser.add_argument("--shuffle_train",  type=bool,default=True,help="shuffle data or not")
parser.add_argument('--val_ratio', type=float, default=0.1)
parser.add_argument('--test_ratio', type=float, default=0.1)
parser.add_argument("--loss_weight", action='store_true', help="weight for unbalance data")
parser.add_argument("--seed",  type=int,default=101,
        help="random seed for np.random.seed, torch.manual_seed and torch.cuda.manual_seed.")

parser.add_argument("--truncation_strategy", type=str, default="head",help="how to truncate the long length email")
parser.add_argument("--batch_size", type=int, default=8)
parser.add_argument("--model_path",type=str,default="/home/ec2-user/SageMaker/retention_model_NLP/multi-class/roberta_large_repo")
parser.add_argument("--feature_name", default="TextBody", type=str)
parser.add_argument("--is_train_inference", action="store_true", help="inference for training set or not")

args,_= parser.parse_known_args()

args.loss_weight=True
args.batch_size=64

print(args)

Namespace(batch_size=64, feature_name='TextBody', gpus=[0, 1], is_train_inference=False, loss_weight=True, model_path='/home/ec2-user/SageMaker/retention_model_NLP/multi-class/roberta_large_repo', seed=101, shuffle_train=True, test_ratio=0.1, truncation_strategy='head', val_ratio=0.1)


In [17]:
seed_everything(args.seed)

#convert categorical target variable into integer target variable
def cate_2_int_label(df,col):
    uniq_label=df[col].unique()
    uniq_label.sort()
    label_map={v:idx for idx,v in enumerate(uniq_label)}
    df[col]=list(map(label_map.get, df[col]))
    df = df.rename(columns={col: 'label'})
    return df, label_map

train_df, train_label_map=cate_2_int_label(train_df,col="Subtype")
val_df, val_label_map=cate_2_int_label(val_df,col="Subtype")
test_df, test_label_map=cate_2_int_label(test_df,col="Subtype")

# train_df=train_df.sample(n=1000)
# val_df=val_df.sample(n=1000)
# test_df=test_df.sample(n=1000)

hf_train=Dataset.from_pandas(train_df)
hf_val=Dataset.from_pandas(val_df)
hf_test=Dataset.from_pandas(test_df)
# hf_data=DatasetDict({"train":hf_train, "val":hf_val,  "test":hf_test})
hf_data=concatenate_datasets([hf_train,  hf_val],split="train")
hf_data=DatasetDict({"train":hf_data, "test":hf_test})

hf_data=hf_data.filter(lambda x: x[args.feature_name]!=None)

train_label=train_df['label'].values.squeeze()
num_classes=np.unique(train_label).shape[0]

tokenizer=AutoTokenizer.from_pretrained(args.model_path)
model=AutoModelForSequenceClassification.from_pretrained(args.model_path, num_labels = num_classes)

print()
print(f"The maximal # input tokens : {tokenizer.model_max_length:,}")
print(f"Vocabulary size : {tokenizer.vocab_size:,}")
print(f"The # of parameters : {sum([p.nelement() for p in model.parameters()]):,}")
print()

hf_data=hf_data.map(lambda x: tokenizer(x[args.feature_name]),batched=True)

max_seq_length=tokenizer.model_max_length
def truncation_text(example):
    truncated_input_ids=tokenizer(example[args.feature_name],truncation=True,padding=False,return_tensors="pt",add_special_tokens=False)['input_ids']

    if args.truncation_strategy=="tail":
        truncated_input_ids=truncated_input_ids[:,-(max_seq_length - 2):].squeeze()
    elif args.truncation_strategy=="head":
        truncated_input_ids=truncated_input_ids[:,0:(max_seq_length - 2)].squeeze()
    elif args.truncation_strategy=="mixed":
        truncated_input_ids=truncated_input_ids[:(max_seq_length - 2) // 2] + truncated_input_ids[-((max_seq_length - 2) // 2):]
        truncated_input_ids=truncated_input_ids.squeeze()
    else:
        raise NotImplemented("Unknown truncation. Supported truncation: tail, head, mixed truncation")

    return {"truncated_text":tokenizer.decode(truncated_input_ids)}

hf_data=hf_data.map(truncation_text)
columns=hf_data['train'].column_names
columns_to_keep=['truncated_text','label']
columns_to_remove=set(columns)-set(columns_to_keep)
hf_data=hf_data.remove_columns(columns_to_remove)
hf_data=hf_data.rename_column("truncated_text", args.feature_name)

train_data=hf_data['train'].shuffle(seed=101).select(range(len(hf_data["train"])))
# val_data=hf_data['val'].shuffle(seed=101).select(range(len(hf_data["val"])))
test_data=hf_data['test'].shuffle(seed=101).select(range(len(hf_data["test"])))

os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(x) for x in args.gpus)
# print(f"The number of GPUs is {torch.cuda.device_count()}")
if torch.cuda.is_available():    
    device = torch.device("cuda")
    print()
    print('{:<30}{:<10}'.format("The # of availabe GPU(s): ",torch.cuda.device_count()))

    for i in range(torch.cuda.device_count()):
        print('{:<30}{:<10}'.format("GPU Name: ",torch.cuda.get_device_name(i)))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")
    
train_data.set_format(type="pandas")
df_train=train_data[:]
test_data.set_format(type="pandas")
df_test=test_data[:]

train_data=Dataset.from_pandas(df_train)
test_data=Dataset.from_pandas(df_test)


train_module=utils.Loader_Creation(train_data, tokenizer,args.feature_name)


test_module=utils.Loader_Creation(test_data, tokenizer,args.feature_name)

train_data.set_format(type="pandas")
df_train=train_data[:]
train_data.reset_format()

train_dataloader=DataLoader(train_module,
                            shuffle=True,
                            batch_size=args.batch_size,
                            collate_fn=train_module.collate_fn,
                            drop_last=False   # longformer model bug
                           )

test_dataloader=DataLoader(test_module,
                            shuffle=False,
                            batch_size=args.batch_size,
                            collate_fn=test_module.collate_fn
                           )

print()
print('{:<30}{:<10,} '.format("training mini-batch",len(train_dataloader)))
#     print('{:<30}{:<10,} '.format("validation mini-batch",len(valid_dataloader)))
print('{:<30}{:<10,} '.format("test mini-batch",len(test_dataloader)))


if args.loss_weight:
    train_classes_num, train_classes_weight = utils.get_class_count_and_weight(train_label,num_classes)
    loss_weight=torch.tensor(train_classes_weight).to(device)
else:
    loss_weight=None

100%|██████████| 126/126 [00:00<00:00, 204.21ba/s]
100%|██████████| 14/14 [00:00<00:00, 204.31ba/s]



The maximal # input tokens : 512
Vocabulary size : 50,265
The # of parameters : 355,373,069



  0%|          | 0/126 [00:00<?, ?ba/s]Token indices sequence length is longer than the specified maximum sequence length for this model (523 > 512). Running this sequence through the model will result in indexing errors
100%|██████████| 126/126 [00:17<00:00,  7.01ba/s]
100%|██████████| 14/14 [00:01<00:00,  7.26ba/s]
100%|██████████| 125847/125847 [02:42<00:00, 772.68ex/s]
100%|██████████| 13977/13977 [00:17<00:00, 800.49ex/s]



The # of availabe GPU(s):     2         
GPU Name:                     NVIDIA A10G
GPU Name:                     NVIDIA A10G


100%|██████████| 126/126 [00:20<00:00,  6.04ba/s]
100%|██████████| 14/14 [00:02<00:00,  5.77ba/s]



training mini-batch           1,967      
test mini-batch               219        


In [None]:
def model_evaluate(y_test, pred_test):
    
    ## convert logits into probability
    pred_test=torch.nn.functional.softmax(torch.from_numpy(pred_test),dim=1).numpy()
    
    # acc = np.sum(pred_test.argmax(axis=1) == y_test.squeeze()) / y_test.shape[0]
    prec_macro, recall_macro, fscore_macro, _ = precision_recall_fscore_support(y_test.squeeze(), pred_test.argmax(axis=1), average='macro')
    prec_micro, recall_micro, fscore_micro, _ = precision_recall_fscore_support(y_test.squeeze(), pred_test.argmax(axis=1), average='micro')
    prec_weighted, recall_weighted, fscore_weighted, _ = precision_recall_fscore_support(y_test.squeeze(), pred_test.argmax(axis=1), average='weighted')
    
    macro_roc_auc_ovo=roc_auc_score(y_test,pred_test,multi_class="ovo",average="macro")
    weighted_roc_auc_ovo=roc_auc_score(y_test,pred_test,multi_class="ovo",average="weighted")

    macro_roc_auc_ovr=roc_auc_score(y_test,pred_test,multi_class="ovr",average="macro")
    weighted_roc_auc_ovr=roc_auc_score(y_test,pred_test,multi_class="ovr",average="weighted")
    
    
    _, count=np.unique(y_test,return_counts=True)
    weight=count/count.sum()
    
    y_test_binary=label_binarize(y_test, classes=np.unique(y_test).tolist())
    n_classes = y_test_binary.shape[1]
    
    acc = dict()
    for i in range(n_classes):
        mask=(y_test==i)
        acc[i]=np.sum(pred_test[mask].argmax(axis=1) == i) / np.sum(mask)    
    acc["micro"] = np.sum(pred_test.argmax(axis=1) == y_test.squeeze()) / y_test.shape[0]
    acc["macro"]=0
    acc["weighted"]=0   
    for i in range(n_classes):
        acc["macro"]+=acc[i]
        acc["weighted"]+=acc[i]*weight[i]
    acc["macro"]/=n_classes
    
    roc_auc = dict()
    pr_auc = dict()
    for i in range(n_classes):
        fpr, tpr, _ = roc_curve(y_test_binary[:, i],pred_test[:, i])
        roc_auc[i] = auc_score(fpr, tpr)
        
        prec,rec,_ = precision_recall_curve(y_test_binary[:, i], torch.sigmoid(torch.from_numpy(pred_test))[:,i].numpy())
        pr_auc[i]=auc_score(rec,prec)
    
    # Compute micro-average ROC curve and ROC area
    fpr, tpr, _ = roc_curve(y_test_binary.ravel(), pred_test.ravel())
    roc_auc["micro"] = auc_score(fpr, tpr)
    roc_auc["macro"]=0
    roc_auc["weighted"]=0
    for i in range(n_classes):
        roc_auc["macro"]+=roc_auc[i]
        roc_auc["weighted"]+=roc_auc[i]*weight[i]
    roc_auc["macro"]/=n_classes
    
    prec,rec,_ = precision_recall_curve(y_test_binary.ravel(), torch.sigmoid(torch.from_numpy(pred_test)).numpy().ravel())
    pr_auc["micro"]=auc_score(rec,prec)

    pr_auc["macro"]=0
    pr_auc["weighted"]=0
    for i in range(n_classes):
        pr_auc["macro"]+=pr_auc[i]
        pr_auc["weighted"]+=pr_auc[i]*weight[i]
    pr_auc["macro"]/=n_classes

    metrics = {}
    
    metrics['prec_macro'] = prec_macro
    metrics['recall_macro'] = recall_macro
    metrics['fscore_macro'] = fscore_macro
    metrics['acc_macro'] = acc["macro"]

    metrics['prec_micro'] = prec_micro
    metrics['recall_micro'] = recall_micro
    metrics['fscore_micro'] = fscore_micro
    metrics['acc_micro'] = acc["micro"]

    metrics['prec_weighted'] = prec_weighted
    metrics['recall_weighted'] = recall_weighted
    metrics['fscore_weighted'] = fscore_weighted
    metrics['acc_weighted'] = acc["weighted"]
    
    metrics['auc_micro']=roc_auc["micro"]
    
    metrics['auc_macro_ovo']=macro_roc_auc_ovo
    metrics['auc_macro_ovr']=macro_roc_auc_ovr
    
    metrics['auc_weighted_ovo']=weighted_roc_auc_ovo
    metrics['auc_weighted_ovr']=weighted_roc_auc_ovr  
    
    metrics['pr_auc_micro']=pr_auc["micro"]
    metrics['pr_auc_macro']=pr_auc["macro"]
    metrics['pr_auc_weighted']=pr_auc["weighted"]

    return metrics, acc, roc_auc, pr_auc

### Training Set

In [None]:
y_pred, y_target, losses_tmp=utils.eval_func(train_dataloader,model,device,num_classes=num_classes,loss_weight=loss_weight)
label_map={v:k for k,v in train_label_map.items()}

print()
print(label_map)
print()

n_classes=len(label_map)

metrics_dict, acc, roc_auc, pr_auc = model_evaluate(y_target,y_pred)
report=metrics.classification_report(y_target.squeeze(), y_pred.argmax(axis=1), output_dict=True)

table = pd.DataFrame(report).transpose().iloc[:n_classes,:]
table["count"]=table["support"].astype(int)
table["accuracy"]=[acc[i] for i in range(n_classes)]
# table["roc_auc"]=[roc_auc[i] for i in range(n_classes)]
table["pr_auc"]=[pr_auc[i] for i in range(n_classes)]
table["subtype_type"]=[label_map[i] for i in range(n_classes)]
table=table[['subtype_type','count','accuracy','precision','recall','f1-score','pr_auc']]

total=table['count'].sum()

table.loc[len(table.index)]=["MACRO",total,metrics_dict['acc_macro'],metrics_dict['prec_macro'],metrics_dict['recall_macro'],metrics_dict['fscore_macro'],metrics_dict['pr_auc_macro']]

table.loc[len(table.index)]=["MICRO",total,metrics_dict['acc_micro'],metrics_dict['prec_micro'],metrics_dict['recall_micro'],metrics_dict['fscore_micro'],metrics_dict['pr_auc_micro']]

table.loc[len(table.index)]=["WEIGHT",total,metrics_dict['acc_weighted'],metrics_dict['prec_weighted'],metrics_dict['recall_weighted'],metrics_dict['fscore_weighted'],metrics_dict['pr_auc_weighted']]

table.style.format({"count":"{:,}","accuracy":"{:.2%}","f1-score":"{:.2%}","precision":"{:.2%}","recall":"{:.2%}","pr_auc":"{:.2%}"})

# print()
# print("{:<20}{:<10.2%}".format("accuracy", metrics_dict['acc']))
# print()
# print("{:<20}{:<10,.2%}{:<16}{:<10,.2%}{:<18}{:<10,.2%}{:<17}{:<10,.2%}{:<16}{:<10,.2%}"\
#       .format("precision(macro):",metrics_dict['prec_macro'],"recall(macro):",metrics_dict['recall_macro'],\
#               "f1-score(macro):",metrics_dict['fscore_macro'],"ROC-AUC(macro):",metrics_dict['auc_macro_ovo'],\
#              "PR-AUC(macro):",metrics_dict['pr_auc_macro']))

# print("{:<20}{:<10,.2%}{:<16}{:<10,.2%}{:<18}{:<10,.2%}{:<17}{:<10,.2%}{:<16}{:<10,.2%}"\
#       .format("precision(micro):",metrics_dict['prec_micro'],"recall(micro):",metrics_dict['recall_micro'],\
#               "f1-score(micro):",metrics_dict['fscore_micro'],"ROC-AUC(micro):",metrics_dict['auc_micro'],\
#              "PR-AUC(micro):",metrics_dict['pr_auc_micro']))

# print("{:<20}{:<10,.2%}{:<16}{:<10,.2%}{:<18}{:<10,.2%}{:<17}{:<10,.2%}{:<16}{:<10,.2%}"\
#       .format("precision(weight):",metrics_dict['prec_weighted'],"recall(weight):",metrics_dict['recall_weighted'],\
#               "f1-score(weight):",metrics_dict['fscore_weighted'],"ROC-AUC(weight):",metrics_dict['auc_weighted_ovo'],\
#              "PR-AUC(weight):",metrics_dict['pr_auc_weighted']))

 19%|█▉        | 378/1967 [08:42<36:35,  1.38s/it]

### Test Set

In [113]:
if args.is_train_inference:
    y_pred, y_target, losses_tmp=utils.eval_func(train_dataloader,model,device,num_classes=num_classes,loss_weight=loss_weight)
    label_map={v:k for k,v in train_label_map.items()}
else:
    y_pred, y_target, losses_tmp=utils.eval_func(test_dataloader,model,device,num_classes=num_classes,loss_weight=loss_weight)
    label_map={v:k for k,v in test_label_map.items()}

print()
print(label_map)
print()

n_classes=len(label_map)

metrics_dict, acc, roc_auc, pr_auc = model_evaluate(y_target,y_pred)
report=metrics.classification_report(y_target.squeeze(), y_pred.argmax(axis=1), output_dict=True)

table = pd.DataFrame(report).transpose().iloc[:n_classes,:]
table["count"]=table["support"].astype(int)
table["accuracy"]=[acc[i] for i in range(n_classes)]
# table["roc_auc"]=[roc_auc[i] for i in range(n_classes)]
table["pr_auc"]=[pr_auc[i] for i in range(n_classes)]
table["subtype_type"]=[label_map[i] for i in range(n_classes)]
table=table[['subtype_type','count','accuracy','precision','recall','f1-score','pr_auc']]

total=table['count'].sum()

table.loc[len(table.index)]=["MACRO",total,metrics_dict['acc_macro'],metrics_dict['prec_macro'],metrics_dict['recall_macro'],metrics_dict['fscore_macro'],metrics_dict['pr_auc_macro']]

table.loc[len(table.index)]=["MICRO",total,metrics_dict['acc_micro'],metrics_dict['prec_micro'],metrics_dict['recall_micro'],metrics_dict['fscore_micro'],metrics_dict['pr_auc_micro']]

table.loc[len(table.index)]=["WEIGHT",total,metrics_dict['acc_weighted'],metrics_dict['prec_weighted'],metrics_dict['recall_weighted'],metrics_dict['fscore_weighted'],metrics_dict['pr_auc_weighted']]

table.style.format({"count":"{:,}","accuracy":"{:.2%}","f1-score":"{:.2%}","precision":"{:.2%}","recall":"{:.2%}","pr_auc":"{:.2%}"})

# print()
# print("{:<20}{:<10.2%}".format("accuracy", metrics_dict['acc']))
# print()
# print("{:<20}{:<10,.2%}{:<16}{:<10,.2%}{:<18}{:<10,.2%}{:<17}{:<10,.2%}{:<16}{:<10,.2%}"\
#       .format("precision(macro):",metrics_dict['prec_macro'],"recall(macro):",metrics_dict['recall_macro'],\
#               "f1-score(macro):",metrics_dict['fscore_macro'],"ROC-AUC(macro):",metrics_dict['auc_macro_ovo'],\
#              "PR-AUC(macro):",metrics_dict['pr_auc_macro']))

# print("{:<20}{:<10,.2%}{:<16}{:<10,.2%}{:<18}{:<10,.2%}{:<17}{:<10,.2%}{:<16}{:<10,.2%}"\
#       .format("precision(micro):",metrics_dict['prec_micro'],"recall(micro):",metrics_dict['recall_micro'],\
#               "f1-score(micro):",metrics_dict['fscore_micro'],"ROC-AUC(micro):",metrics_dict['auc_micro'],\
#              "PR-AUC(micro):",metrics_dict['pr_auc_micro']))

# print("{:<20}{:<10,.2%}{:<16}{:<10,.2%}{:<18}{:<10,.2%}{:<17}{:<10,.2%}{:<16}{:<10,.2%}"\
#       .format("precision(weight):",metrics_dict['prec_weighted'],"recall(weight):",metrics_dict['recall_weighted'],\
#               "f1-score(weight):",metrics_dict['fscore_weighted'],"ROC-AUC(weight):",metrics_dict['auc_weighted_ovo'],\
#              "PR-AUC(weight):",metrics_dict['pr_auc_weighted']))

100%|██████████| 219/219 [05:01<00:00,  1.38s/it]



{0: 'bill hide or delete', 1: 'bill not received', 2: 'broker of record change (bor)', 3: 'late notice or collections', 4: 'less than minimum lives', 5: 'missing information', 6: 'missing or skipped payment', 7: 'new plan administrator', 8: 'other-category', 9: 'policy level discrepancy', 10: 'policy termination', 11: 'premium discrepancy', 12: 'request to speak to dbs'}



Unnamed: 0,subtype_type,count,accuracy,precision,recall,f1-score,pr_auc
0,bill hide or delete,73,80.82%,60.82%,80.82%,69.41%,70.20%
1,bill not received,355,76.90%,57.72%,76.90%,65.94%,71.82%
2,broker of record change (bor),2181,94.13%,95.18%,94.13%,94.65%,98.28%
3,late notice or collections,280,68.57%,53.19%,68.57%,59.91%,61.37%
4,less than minimum lives,128,78.12%,52.36%,78.12%,62.70%,75.02%
5,missing information,2964,92.68%,98.99%,92.68%,95.73%,99.15%
6,missing or skipped payment,274,43.07%,43.38%,43.07%,43.22%,44.85%
7,new plan administrator,2010,92.44%,95.63%,92.44%,94.00%,97.70%
8,other-category,1075,66.23%,58.89%,66.23%,62.35%,63.85%
9,policy level discrepancy,171,8.19%,12.96%,8.19%,10.04%,14.14%


In [114]:
# print()
# print(label_map)

# n_classes=len(label_map)

# report=metrics.classification_report(y_target.squeeze(), y_pred.argmax(axis=1), output_dict=True)

# table = pd.DataFrame(report).transpose().iloc[:n_classes,:]
# table["count"]=table["support"].astype(int)
# table["roc_auc"]=[roc_auc[i] for i in range(n_classes)]
# table["pr_auc"]=[pr_auc[i] for i in range(n_classes)]
# table["subtype_type"]=[label_map[i] for i in range(n_classes)]
# table=table[['subtype_type','count','precision','recall','f1-score','roc_auc','pr_auc']]

# total=table['count'].sum()

# table.loc[len(table.index)]=["MACRO",total,metrics_dict['prec_macro'],metrics_dict['recall_macro'],metrics_dict['fscore_macro'],\
#                         metrics_dict['auc_macro_ovo'],metrics_dict['pr_auc_macro']]

# table.loc[len(table.index)]=["MICRO",total,metrics_dict['prec_micro'],metrics_dict['recall_micro'],metrics_dict['fscore_micro'],\
#                             metrics_dict['auc_micro'],metrics_dict['pr_auc_micro']]

# table.loc[len(table.index)]=["WEIGHT",total,metrics_dict['prec_weighted'],metrics_dict['recall_weighted'],metrics_dict['fscore_weighted'],\
#                         metrics_dict['auc_weighted_ovo'],metrics_dict['pr_auc_weighted']]

# table.style.format({"count":"{:,}","f1-score":"{:.2%}","precision":"{:.2%}","recall":"{:.2%}","roc_auc":"{:.2%}","pr_auc":"{:.2%}"})

In [107]:
# def metric_table(table_name="metrics_training.txt"):
#     Model_Type=[]
#     EPOCH=[]
#     LOSS=[]
#     Accuracy=[]
#     False_Prediction=[]
#     Accuracy=[]
#     fscore_micro=[]
#     fscore_macro=[]
#     fscore_weighted=[]
#     auc_micro=[]
#     auc_macro=[]
#     auc_weighted=[]

#     with open(os.path.join(os.getcwd(),table_name),'r') as f:
#         for line in f:
#             Model_Type.append(str(line.split(",")[0]))
#             EPOCH.append(int(line.split(",")[1]))
#             LOSS.append(float(line.split(",")[2]))
#             Accuracy.append(float(line.split(",")[3]))
#             fscore_micro.append(float(line.split(",")[4]))
#             fscore_macro.append(float(line.split(",")[5]))
#             fscore_weighted.append(float(line.split(",")[6]))
#             auc_micro.append(float(line.split(",")[7]))
#             auc_macro.append(float(line.split(",")[8]))
#             auc_weighted.append(float(line.split(",")[9]))


#     metrics=pd.DataFrame({"model_type":Model_Type,"epoch":EPOCH,"loss":LOSS,"Accuracy":Accuracy,"F1-Score-Micro":fscore_micro,"F1-Score-Macro":fscore_macro,\
#                          "F1-Score-Weighted":fscore_weighted,"AUC_Micro":auc_micro,"AUC-Macro":auc_macro,"AUC-Weighted":auc_weighted})
#     metrics.drop_duplicates(subset=["model_type","epoch"],inplace=True)
#     metrics.sort_values(by=['model_type','epoch'],inplace=True)       
    
#     return metrics

# def style_format(metrics_training, metrics_test, model):
#     metrics_training=metrics_training[metrics_training["model_type"]==model].reset_index(drop=True)
#     metrics_training=metrics_training.sort_values('F1-Score-Weighted', ascending=False).head(1)
#     metrics_training.drop("epoch",inplace=True,axis=1)
#     metrics_training["data"]=["training set"]
    
#     metrics_test=metrics_test[metrics_test["model_type"]==model].reset_index(drop=True)
#     metrics_test=metrics_test.sort_values('F1-Score-Weighted', ascending=False).head(1)
#     metrics_test.drop("epoch",inplace=True,axis=1)
#     metrics_test["data"]=["test set"]
    
#     metrics=pd.concat([metrics_training,metrics_test])
#     first_column =  metrics.pop('data')
#     metrics.insert(0, 'data', first_column)
    
#     return metrics.style.format({"loss":"{:.4f}","Accuracy":"{:.2%}","F1-Score-Micro":"{:.2%}","F1-Score-Macro":"{:.2%}", "F1-Score-Weighted":"{:.2%}", "AUC_Micro":"{:.2%}", \
#                                 "AUC-Macro":"{:.2%}", "AUC-Weighted":"{:.2%}"}) \
#     .set_caption(f"Performance Summary for-- {model}") \
#     .set_table_styles([{
#         'selector': 'caption',
#         'props': [
#             ('color', 'red'),
#             ('font-size', '20px')
#         ]
#     }])

# metric_training=metric_table(table_name="metrics_training.txt")
# metric_test=metric_table(table_name="metrics_test.txt")

# # style_format(metric_training,metric_test, model="bert_base")

# style_format(metric_training,metric_test, model="bert_large")

# # style_format(metric_training,metric_test, model="roberta_base")

# style_format(metric_training,metric_test, model="roberta_large")