In [None]:
import sys
sys.path.append("/opt/omniai/work/instance1/jupyter/v4_new_email/Fine-Tuning")
sys.path=list(set(sys.path))

In [None]:
import math
import argparse
import os
import pandas as pd
pd.options.mode.chained_assignment=None
from tqdm.auto import tqdm
tqdm.pandas(position=0,leave=True)
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
from sklearn import metrics
import utils

from transformers import AutoTokenizer, AutoModel, AutoConfig

### Transformer Models

In [None]:
model_name="roberta-large"
model_path=os.path.join("/opt/omniai/work/instance1/jupyter/", "transformers-models",model_name)
config=AutoConfig.from_pretrained(model_path)
tokenizer=AutoTokenizer.from_pretrained(model_path)
model=AutoModel.from_pretrained(model_path)
num_hidden_layers=config.num_hidden_layers
num_param=sum([p.nelement() for p in model.parameters()])
model_summary=pd.DataFrame({"model_name":[model_name],"maximally allowed token":[config.max_position_embeddings-2],\
                            "# of parameters":[num_param],"num_hidden_layer":[num_hidden_layers],"embedding_size":config.hidden_size})

model_name="deberta-v3-large"
model_path=os.path.join("/opt/omniai/work/instance1/jupyter/", "transformers-models",model_name)
config=AutoConfig.from_pretrained(model_path)
tokenizer=AutoTokenizer.from_pretrained(model_path)
model=AutoModel.from_pretrained(model_path)
num_hidden_layers=config.num_hidden_layers
num_param=sum([p.nelement() for p in model.parameters()])
tempt=pd.DataFrame({"model_name":[model_name],"maximally allowed token":[config.max_position_embeddings],\
                            "# of parameters":[num_param],"num_hidden_layer":[num_hidden_layers],"embedding_size":config.hidden_size})
model_summary=pd.concat([model_summary,tempt],axis=0,ignore_index=True)

model_name="deberta-v2-xlarge"
model_path=os.path.join("/opt/omniai/work/instance1/jupyter/", "transformers-models",model_name)
config=AutoConfig.from_pretrained(model_path)
tokenizer=AutoTokenizer.from_pretrained(model_path)
model=AutoModel.from_pretrained(model_path)
num_hidden_layers=config.num_hidden_layers
num_param=sum([p.nelement() for p in model.parameters()])
tempt=pd.DataFrame({"model_name":[model_name],"maximally allowed token":[config.max_position_embeddings],\
                            "# of parameters":[num_param],"num_hidden_layer":[num_hidden_layers],"embedding_size":config.hidden_size})
model_summary=pd.concat([model_summary,tempt],axis=0,ignore_index=True)

model_name="longformer-base-4096"
model_path=os.path.join("/opt/omniai/work/instance1/jupyter/", "transformers-models",model_name)
config=AutoConfig.from_pretrained(model_path)
tokenizer=AutoTokenizer.from_pretrained(model_path)
model=AutoModel.from_pretrained(model_path)
num_hidden_layers=config.num_hidden_layers
num_param=sum([p.nelement() for p in model.parameters()])
tempt=pd.DataFrame({"model_name":[model_name],"maximally allowed token":[config.max_position_embeddings],\
                            "# of parameters":[num_param],"num_hidden_layer":[num_hidden_layers],"embedding_size":config.hidden_size})
model_summary=pd.concat([model_summary,tempt],axis=0,ignore_index=True)

model_name="longformer-large-4096"
model_path=os.path.join("/opt/omniai/work/instance1/jupyter/", "transformers-models",model_name)
config=AutoConfig.from_pretrained(model_path)
tokenizer=AutoTokenizer.from_pretrained(model_path)
model=AutoModel.from_pretrained(model_path)
num_hidden_layers=config.num_hidden_layers
num_param=sum([p.nelement() for p in model.parameters()])
tempt=pd.DataFrame({"model_name":[model_name],"maximally allowed token":[config.max_position_embeddings],\
                            "# of parameters":[num_param],"num_hidden_layer":[num_hidden_layers],"embedding_size":config.hidden_size})
model_summary=pd.concat([model_summary,tempt],axis=0,ignore_index=True)

model_name="bigbird-roberta-large"
model_path=os.path.join("/opt/omniai/work/instance1/jupyter/", "transformers-models",model_name)
config=AutoConfig.from_pretrained(model_path)
tokenizer=AutoTokenizer.from_pretrained(model_path)
model=AutoModel.from_pretrained(model_path)
num_hidden_layers=config.num_hidden_layers
num_param=sum([p.nelement() for p in model.parameters()])
tempt=pd.DataFrame({"model_name":[model_name],"maximally allowed token":[config.max_position_embeddings],\
                            "# of parameters":[num_param],"num_hidden_layer":[num_hidden_layers],"embedding_size":config.hidden_size})
model_summary=pd.concat([model_summary,tempt],axis=0,ignore_index=True)

In [None]:
model_summary.style.format({"maximally allowed token":"{:,}","# of parameters":"{:,}","embedding_size":"{:,}"})

In [None]:
def metrics_read(df, model_name):
    # df=pd.read_csv(os.path.join(output_dir , file_name))
    true_y=df["True_label"].values
    pred_y=df["Predicted_label"].values
    pred_prob=df["Predicted_prob"].values
    best_threshold=df['best_threshold'].unique()[0]

    # test_output=utils.model_evaluate(true_y.reshape(-1),pred_y)
    test_output=utils.model_evaluate(true_y.reshape(-1),pred_prob,best_threshold)
    metric=pd.DataFrame()
    metric["model_type"]=[f"{model_name}"]
    metric["total complaint #"]=[test_output["total positive"]]
    metric["false_positive"]=[test_output["false positive"]]
    metric["false_negative"]=[test_output["false_negative"]]
    metric["precision"]=[test_output["precision"]]
    metric["recall"]=[test_output["recall"]]
    metric["f1_score"]=[test_output["f1_score"]]
    metric["roc_auc"]=[test_output["AUC"]]
    metric["pr_auc"]=[test_output["pr_auc"]]
    return metric

def style_format(metrics, type="test set"):
    # metrics=metrics[metrics["model_type"].apply(lambda x : x.split("-")[0]==model.split("-")[0])].reset_index(drop=True)
    return metrics.style.format({"total complaint #":"{:,}","false_positive":"{:,}","false_negative":"{:,}", "precision":"{:.2%}", "recall":"{:.2%}", \
                                "f1_score":"{:.2%}", "roc_auc":"{:.2%}", "pr_auc":"{:.2%}"}) \
    .set_caption(f"Performance Summary for {type} ") \
    .set_table_styles([{
        'selector': 'caption',
        'props': [
            ('color', 'red'),
            ('font-size', '15px')
        ]
    }])

def dist_func(df, cols):
    tempt1=pd.DataFrame(df[cols].value_counts(dropna=False)).reset_index().rename(columns={'index':cols,cols:'count'})
    tempt2=pd.DataFrame(df[cols].value_counts(dropna=False,normalize=True)).reset_index().rename(columns={'index':cols,cols:'percentage'})
    tempt3=tempt1.merge(tempt2, on=cols, how="inner")
    tempt3=tempt3.loc[:,[cols,'count','percentage']]
    return tempt3

def style_format_dist(df,title):
    return df.style.format({'count':'{:,}','percentage':'{:.2%}'})\
           .set_caption(f"{title}")\
           .set_table_styles([{'selector': 'caption','props': [('color', 'red'),('font-size', '12px')]}])

def metrics_df_func(output_dir, model_name):
    data_name=[x for x in os.listdir(output_dir) if x.split(".")[-1]=="csv"]
    data_name=sorted(data_name)
    df=pd.read_csv(os.path.join(output_dir , data_name[0]))
    metrics=metrics_read(df,model_name)
    N=data_name[0].split("_")[1].split(".")[0]
    metrics.insert(0,"Recall in Val",[f"recall>={N}0%"])
    
    for i in range(1,len(data_name)):
        df=pd.read_csv(os.path.join(output_dir , data_name[i]))
        tempt=metrics_read(df,model_name)
        N=data_name[i].split("_")[1].split(".")[0]
        tempt.insert(0,"Recall in Val",[f"recall>={N}%"])
        metrics=pd.concat([metrics,tempt],axis=0,ignore_index=True)
        
    return metrics

def metrics_df(output_dir, model_name):
    data_name=[x for x in os.listdir(output_dir) if x.split(".")[-1]=="csv"]
    data_name=sorted(data_name)
    df=pd.read_csv(os.path.join(output_dir , data_name[0]))
    metrics=metrics_read(df,model_name)
    for i in range(1,len(data_name)):
        df=pd.read_csv(os.path.join(output_dir , data_name[i]))
        metrics=pd.concat([metrics,metrics_read(df,model_name)],axis=0,ignore_index=True)
        
    metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
    return metrics

In [None]:
def response_rate_eval(logit,label,topk):
    DF=pd.DataFrame(columns=["pred_score","actual_label"])
    DF["pred_score"]=logit
    DF["actual_label"]=label
    DF.sort_values(by="pred_score", ascending=False, inplace=True)
    response_rate={}
    for p in topk:
        N=math.ceil(int(DF.shape[0]*p))
        DF2=DF.nlargest(N,"pred_score",keep="first")
        response_rate[str(int(p*100))+"%"]=DF2.actual_label.sum()/DF2.shape[0]
    return response_rate

from matplotlib.ticker import FuncFormatter
def bar_plot(data, colors=None, total_width=0.8, single_width=1, legend=True,title=None,subtitle=None,axis_truncation=0.5):
    """Draws a bar plot with multiple bars per data point.

    Parameters
    ----------
    ax : matplotlib.pyplot.axis
        The axis we want to draw our plot on.

    data: dictionary
        A dictionary containing the data we want to plot. Keys are the names of the
        data, the items is a list of the values.

        Example:
        data = {
            "x":[1,2,3],
            "y":[1,2,3],
            "z":[1,2,3],
        }

    colors : array-like, optional
        A list of colors which are used for the bars. If None, the colors
        will be the standard matplotlib color cyle. (default: None)

    total_width : float, optional, default: 0.8
        The width of a bar group. 0.8 means that 80% of the x-axis is covered
        by bars and 20% will be spaces between the bars.

    single_width: float, optional, default: 1
        The relative width of a single bar within a group. 1 means the bars
        will touch eachother within a group, values less than 1 will make
        these bars thinner.

    legend: bool, optional, default: True
        If this is set to true, a legend will be added to the axis.
    """

    # Check if colors where provided, otherwhise use the default color cycle
    
    fig, ax = plt.subplots(figsize =(15, 8))
    
    if colors is None:
        colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
    
    # Number of bars per group
    n_bars = len(data)

    # The width of a single bar
    bar_width = total_width / n_bars

    # List containing handles for the drawn bars, used for the legend
    bars = []

    # Iterate over all data
    for i, (name, values) in enumerate(data.items()):
        # The offset in x direction of that bar
        x_offset = (i - n_bars / 2) * bar_width + bar_width / 2

        # Draw a bar for every value of that type
        for x, y in enumerate(values.values()):
            bar = ax.bar(x + x_offset, y, width=bar_width * single_width, color=colors[i % len(colors)])

        # Add a handle to the last drawn bar, which we'll need for the legend
        bars.append(bar[0])

    # Draw legend if we need
    if legend:
        ax.legend(bars, data.keys())
    
    ax.set_ylabel('Accuracy',fontsize=15)
    ax.yaxis.set_major_formatter(FuncFormatter(lambda y,_: "{:.0%}".format(y)))
    ind=np.arange(len(data[list(data.keys())[0]]))
    ax.set_xticks(ind)
    ax.set_xticklabels( ('top 1% score', 'top 2% score', 'top 5% score','top 10% score') )
    ax.set_title(f"Top Predicted Score  ",fontsize=15)
    
    #     plt.xlim([0, 1])
    # plt.ylim([axis_truncation, 1])
    plt.show()


#### TFIDF Model

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v4_new_email/TFIDF/results/04_23/xgboost/"
model_name="xgboost"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="xgboost (04/2023)")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v4_new_email/TFIDF/results/02_23/xgboost/"
model_name="xgboost"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="xgboost (02/2023)")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v4_new_email/TFIDF/results/02_23/xgboost/"
model_name="xgboost"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="xgboost (02/2023)")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/TFIDF/xgboost/tfidf_data/"
model_name="xgboost"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="xgboost")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/TFIDF/xgboost/tfidf_data/"
model_name="xgboost"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="xgboost")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/TFIDF/lightgbm/tfidf_data/"
model_name="lightgbm"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="lightgbm")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/TFIDF/lightgbm/tfidf_data/"
model_name="lightgbm"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="lightgbm")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/TFIDF/randomforest/tfidf_data/"
model_name="random-forest"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="random-forest")

#### deberta-v3-large model

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v4_new_email/Fine-Tuning/results/04_23/deberta_v3_large"
model_name="deberta-v3-large"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="deberta-v3-large model(04/2023)")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v4_new_email/Fine-Tuning/results/03_23/deberta_v3_large"
model_name="deberta-v3-large"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="deberta-v3-large model(03/2023)")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v4_new_email/Fine-Tuning/results/02_23/deberta_v3_large"
model_name="deberta-v3-large"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="deberta-v3-large model(02/2023)")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/deberta_v3_large"
model_name="deberta-v3-large"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="deberta-v3-large model")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/deberta_v3_large"
df=pd.read_csv(os.path.join(output_dir, "predictions_97.csv"))
false_positive=df[(df.True_label==0) & (df.Predicted_label==1)]
false_negative=df[(df.True_label==1) & (df.Predicted_label==0)]

In [None]:

df["text_length_v2"]=df["text_length"].apply(lambda x: "len<=512" if x<=512 else "len>512")
style_format_dist(dist_func(df, "text_length_v2"),title="text length in test set")

In [None]:
pd.options.mode.chained_assignment=None
false_positive["text_length"]=false_positive["text_length"].apply(lambda x: "len<=512" if x<=512 else "len>512")
style_format_dist(dist_func(false_positive, "text_length"),title="text length in false positive")

In [None]:
false_negative["text_length"]=false_negative["text_length"].apply(lambda x: "len<=512" if x<=512 else "len>512")
style_format_dist(dist_func(false_negative, "text_length"),title="text length in false negative")

In [None]:
style_format_dist(dist_func(df, "is_feedback"),title="feedback_email in test set")

In [None]:
style_format_dist(dist_func(false_positive, "is_feedback"),title="feedback_email in false positive")

In [None]:
style_format_dist(dist_func(false_negative, "is_feedback"),title="feedback_email in false negative")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/deberta_v3_large"
df=pd.read_csv(os.path.join(output_dir, "predictions_97.csv"))
deberta_v3_true, deberta_v3_pred=df["True_label"].tolist(), df["Predicted_label"].tolist()

from sklearn import metrics
confusion_matrix = metrics.confusion_matrix(deberta_v3_true, deberta_v3_pred)

cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = [False, True])

cm_display.plot(values_format=',')
plt.title("deberta-v3-large Model")
plt.show()

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v2_new_email/Fine-Tuning/results/deberta_v3_large"
df=pd.read_csv(os.path.join(output_dir, "predictions_97.csv"))
df.head(2)

#### deberta-v2-xlarge Model

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v4_new_email/Fine-Tuning/results/04_23/deberta_v2_xlarge"
model_name="deberta-v2-xlarge"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="deberta-v2-xlarge model(04/2023)")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v4_new_email/Fine-Tuning/results/03_23/deberta_v2_xlarge"
model_name="deberta-v2-xlarge"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="deberta-v2-xlarge model(03/2023)")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/deberta_v2_xlarge"
model_name="deberta-v2-xlarge"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="deberta-v2-xlarge model")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/deberta_v2_xlarge"
df=pd.read_csv(os.path.join(output_dir, "predictions_95.csv"))

In [None]:
df["text_length_v2"]=df["text_length"].apply(lambda x: "len<=512" if x<=512 else "len>512")
style_format_dist(dist_func(df, "text_length_v2"),title="text length in test set")

In [None]:
false_positive=df[(df.True_label==0) & (df.Predicted_label==1)]
false_negative=df[(df.True_label==1) & (df.Predicted_label==0)]
false_positive["text_length"]=false_positive["text_length"].apply(lambda x: "len<=512" if x<=512 else "len>512")
style_format_dist(dist_func(false_positive, "text_length"),title="text length in false positive")

In [None]:
false_negative["text_length"]=false_negative["text_length"].apply(lambda x: "len<=512" if x<=512 else "len>512")
style_format_dist(dist_func(false_negative, "text_length"),title="text length in false negative")

In [None]:
style_format_dist(dist_func(df, "is_feedback"),title="feedback_email in test set")

In [None]:
style_format_dist(dist_func(false_positive, "is_feedback"),title="feedback_email in false positive")

In [None]:
style_format_dist(dist_func(false_negative, "is_feedback"),title="feedback_email in false negative")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/deberta_v2_xlarge"
df=pd.read_csv(os.path.join(output_dir, "predictions_95.csv"))
deberta_v2_true, deberta_v2_pred=df["True_label"].tolist(), df["Predicted_label"].tolist()

from sklearn import metrics
confusion_matrix = metrics.confusion_matrix(deberta_v2_true, deberta_v2_pred)

cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = [False, True])

cm_display.plot(values_format=',')
plt.title("deberta_v2-xlarge Model")
plt.show()

#### longformer-base model

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/longformer_base_4096"
model_name="longformer_base"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="longformer_base model")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/longformer_base_4096_customized"
model_name="longformer_base"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="customized longformer_base model")

#### longformer-large model

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v4_new_email/Fine-Tuning/results/04_23/longformer_large_4096_customized"
model_name="longformer_large"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="customized longformer_large(04/2023)")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v4_new_email/Fine-Tuning/results/03_23/longformer_large_4096_customized"
model_name="longformer_large"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="customized longformer_large(03/2023)")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v4_new_email/Fine-Tuning/results/02_23/longformer_large_4096_customized"
model_name="longformer_large"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="customized longformer_large(02/2023)")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/longformer_large_4096"
model_name="longformer_large"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="longformer_large model")

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/longformer_large_4096_customized"
model_name="longformer_large"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="customized longformer_large model")

#### Bigbird-Roberta-Large

In [None]:
output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/bigbird_roberta_large_customized"
model_name="bigbird_roberta_large"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="customized bigbird_roberta_large model")

In [None]:
# output_dir="/opt/omniai/work/instance1/jupyter/v2_new_email/Fine-Tuning/misc/results/bigbird_roberta"
# model_name="bigbird_roberta"
# metrics=metrics_df_func(output_dir, model_name)
# style_format(metrics,  type="bigbird_roberta model")

In [None]:
# output_dir="/opt/omniai/work/instance1/jupyter/v2_new_email/Fine-Tuning/misc/results/bigbird_roberta_customized"
# model_name="bigbird_roberta"
# metrics=metrics_df_func(output_dir, model_name)
# style_format(metrics,  type="customized bigbird_roberta model")

In [None]:
test_date="04_23"

precision=[]
recall=[]

output_dir=f"/opt/omniai/work/instance1/jupyter/v4_new_email/TFIDF/results/{test_date}/xgboost/"
df=metrics_df(output_dir, "xgboost")
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

output_dir=f"/opt/omniai/work/instance1/jupyter/v4_new_email/TFIDF/results/{test_date}/lightgbm/"
df=metrics_df(output_dir, "lightgbm")
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

output_dir=f"/opt/omniai/work/instance1/jupyter/v4_new_email/TFIDF/results/{test_date}/randomforest/"
df=metrics_df(output_dir, "random-forest")
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

output_dir=f"/opt/omniai/work/instance1/jupyter/v4_new_email/Fine-Tuning/results/{test_date}/roberta_large_customized"
df=metrics_df(output_dir, "roberta-large")
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

output_dir=f"/opt/omniai/work/instance1/jupyter/v4_new_email/Fine-Tuning/results/{test_date}/deberta_v3_large"
df=metrics_df(output_dir, "deberta-v3-large")
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

# output_dir=f"/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/deberta_v2_xlarge"
# df=metrics_df(output_dir, "deberta-v2-xlarge")
# precision.append(df["precision"].tolist())
# recall.append(df["recall"].tolist())

output_dir=f"/opt/omniai/work/instance1/jupyter/v4_new_email/Fine-Tuning/results/{test_date}/longformer_base_4096_customized"
df=metrics_df(output_dir, "longformer_base")
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

output_dir=f"/opt/omniai/work/instance1/jupyter/v4_new_email/Fine-Tuning/results/{test_date}/longformer_large_4096_customized"
df=metrics_df(output_dir, "longformer_large")
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

output_dir=f"/opt/omniai/work/instance1/jupyter/v4_new_email/Fine-Tuning/results/{test_date}/bigbird_roberta_large_customized"
df=metrics_df(output_dir, "bigbird_large")
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())


In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

# Define precision and recall values for each model
models = ['TFIDF+xgboost','TFIDF+lightgbm','TFIDF+random-forest','roberta-large','deberta-v3-large','longformer-base', 'longformer-large','bigbird_large']

markers = ['o', 's', 'D', 'x', '*', '<', 'p', '^']
colors = ['blue', 'green', 'orange', 'red', 'brown','lawngreen', 'purple','black']

# Plot precision and recall
plt.figure(figsize=(10, 8))

# Iterate over models
for i in range(len(models)):
    plt.plot(recall[i], precision[i], marker=markers[i],  color=colors[i], label=models[i], linewidth=3, linestyle=":", markersize=8)

plt.xlabel('Recall', fontsize=14)
plt.ylabel('Precision', fontsize=14)
plt.title('Precision-Recall Curve \n(test_set=04/2023)', fontsize=16)
plt.grid(True)

# Format axis values as percentages
ax = plt.gca()
ax.xaxis.set_major_locator(mtick.MultipleLocator(base=0.01))
ax.xaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=0))
ax.yaxis.set_major_locator(mtick.MultipleLocator(base=0.001))
ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=2))

plt.ylim(0.004,0.018)

# Add horizontal line for benchmark model
benchmark_precision=0.0053
plt.axhline(y=benchmark_precision, color=(0.8,0.7,0.5),linestyle='--', linewidth=3)

# Set the legend
plt.legend()
plt.legend(models+["Lexican Search"],bbox_to_anchor=(1,0.5), fontsize=14)

# Show the plot
plt.show()

In [None]:
precision=[]
recall=[]
threshold=0.94

output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/TFIDF/xgboost/tfidf_data/"
df=metrics_df(output_dir, "xgboost")
df=df[df.recall>threshold]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/TFIDF/lightgbm/tfidf_data/"
df=metrics_df(output_dir, "lightgbm")
df=df[df.recall>threshold]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/TFIDF/randomforest/tfidf_data/"
df=metrics_df(output_dir, "random-forest")
df=df[df.recall>threshold]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/deberta_v3_large"
df=metrics_df(output_dir, "deberta-v3-large")
df=df[df.recall>threshold]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/deberta_v2_xlarge"
df=metrics_df(output_dir, "deberta-v2-xlarge")
df=df[df.recall>threshold]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/longformer_base_4096_customized"
df=metrics_df(output_dir, "longformer_base")
df=df[df.recall>threshold]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/longformer_large_4096_customized"
df=metrics_df(output_dir, "longformer_large")
df=df[df.recall>threshold]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

output_dir="/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/bigbird_roberta_large_customized"
df=metrics_df(output_dir, "bigbird_large")
df=df[df.recall>threshold]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

# Define precision and recall values for each model
models = ['TFIDF+xgboost','TFIDF+lightgbm','TFIDF+random-forest','deberta-v3-large','deberta-v2-xlarge','longformer-base', 'longformer-large', 'bigbird-large']

markers = ['o', 's', 'D', 'x', '*', '<', 'p', '^']
colors = ['blue', 'green', 'orange', 'red', 'brown','lawngreen', 'purple','black']

# Plot precision and recall
plt.figure(figsize=(8, 6))

# Iterate over models
for i in range(len(models)):
    plt.scatter(recall[i], precision[i], color=colors[i], marker=markers[i],label=models[i], s=100)

plt.xlabel('Recall', fontsize=12)
plt.ylabel('Precision', fontsize=12)
plt.title('Precision-Recall Curve', fontsize=14)
plt.grid(True)

# Format axis values as percentages
ax = plt.gca()
ax.xaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=0))
ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=2))

# Set the legend
plt.legend(bbox_to_anchor=(1,0.5))

# # Add precision and recall values as annotations
# for i in range(len(models)):
#     for j in range(len(precision[i])):
#         x = recall[i][j]
#         y = precision[i][j]
#         text = f'({precision[i][j]*100:.2f}%, {recall[i][j]*100:.2f}%)'
#         plt.annotate(text, (x, y), textcoords="offset points", xytext=(0, 10), ha='center')

# Show the plot
plt.show()

### Top prediction score

In [None]:
model_name="randomforest"
output_dir=f"/opt/omniai/work/instance1/jupyter/v3_new_email/TFIDF/{model_name}/tfidf_data/"
df=pd.read_csv(os.path.join(output_dir , "predictions_9.csv"))
randomforest_true, randomforest_prob=df["True_label"].tolist(), df["Predicted_prob"].tolist()

model_name="xgboost"
output_dir=f"/opt/omniai/work/instance1/jupyter/v3_new_email/TFIDF/{model_name}/tfidf_data/"
df=pd.read_csv(os.path.join(output_dir , "predictions_9.csv"))
xgboost_true, xgboost_prob=df["True_label"].tolist(), df["Predicted_prob"].tolist()

model_name="lightgbm"
output_dir=f"/opt/omniai/work/instance1/jupyter/v3_new_email/TFIDF/{model_name}/tfidf_data/"
df=pd.read_csv(os.path.join(output_dir , "predictions_9.csv"))
lightgbm_true, lightgbm_prob=df["True_label"].tolist(), df["Predicted_prob"].tolist()

model_name="deberta_v3_large"
output_dir=f"/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/{model_name}"
df=pd.read_csv(os.path.join(output_dir , "predictions_9.csv"))
deberta_v3_true, deberta_v3_prob=df["True_label"].tolist(), df["Predicted_prob"].tolist()

model_name="deberta_v2_xlarge"
output_dir=f"/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/{model_name}"
df=pd.read_csv(os.path.join(output_dir , "predictions_9.csv"))
deberta_v2_true, deberta_v2_prob=df["True_label"].tolist(), df["Predicted_prob"].tolist()

model_name="longformer_base_4096_customized"
output_dir=f"/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/{model_name}"
df=pd.read_csv(os.path.join(output_dir , "predictions_9.csv"))
longformer_base_true, longformer_base_prob=df["True_label"].tolist(), df["Predicted_prob"].tolist()

model_name="longformer_large_4096_customized"
output_dir=f"/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/{model_name}"
df=pd.read_csv(os.path.join(output_dir , "predictions_9.csv"))
longformer_large_true, longformer_large_prob=df["True_label"].tolist(), df["Predicted_prob"].tolist()

model_name="bigbird_roberta_large_customized"
output_dir=f"/opt/omniai/work/instance1/jupyter/v3_new_email/Fine-Tuning/results/{model_name}"
df=pd.read_csv(os.path.join(output_dir , "predictions_9.csv"))
bigbird_true, bigbird_prob=df["True_label"].tolist(), df["Predicted_prob"].tolist()

In [None]:
topk=[0.01,0.02,0.05,0.1]

response_lightgbm = response_rate_eval(lightgbm_prob,lightgbm_true, topk)
response_xgboost = response_rate_eval(xgboost_prob,xgboost_true, topk)
response_randomforest = response_rate_eval(randomforest_prob, randomforest_true, topk)

response_deberta_v3 = response_rate_eval(deberta_v3_prob, deberta_v3_true, topk)
response_deberta_v2 = response_rate_eval(deberta_v2_prob, deberta_v2_true, topk)
response_longformer_base = response_rate_eval(longformer_base_prob, longformer_base_true, topk)
response_longformer_large = response_rate_eval(longformer_large_prob, longformer_large_true, topk)
response_bigbird = response_rate_eval(bigbird_prob, bigbird_true, topk)

if __name__ == "__main__":
    data = {
        "tfidf+lightgbm": response_lightgbm,
        "tfidf+xgboost": response_xgboost,
        "tfidf+random-forest": response_randomforest,
        "deberta_v3_large": response_deberta_v3,
        "deberta_v2_xlarge": response_deberta_v2,
        "longformer-base": response_longformer_base,
        "longformer-large": response_longformer_large,
        "bigbird-large": response_bigbird
    }

    
    CL=['r', 'g', 'b', 'c', 'y', 'darkorange', 'lime', 'grey','gold','bisque', 'lightseagreen', 'purple']
    bar_plot(data, colors=CL,total_width=.7, single_width=1,title="(response rate)",subtitle="Test Set ",axis_truncation=0.50)


In [None]:
def val_mask_creation(dataset,target_variable, validation_split):
    
    dataset.sort_values(by='time', ascending=False, axis=0, inplace = True)
    dataset=dataset.reset_index(drop=True)
    
    train_idx=[]
    val_idx=[]
    
    LABEL=dataset[target_variable].values.squeeze()
    IDX=np.arange(LABEL.shape[0])
    target_list=np.unique(LABEL).tolist()
        
    for i in range(len(target_list)):
        
        _idx=IDX[LABEL==target_list[i]]
        ## split train and valiation by time instead of randomly
        # np.random.seed(seed)
        # np.random.shuffle(_idx)
        
        split=int(np.floor(validation_split*_idx.shape[0]))
        
        val_idx.extend(_idx[ : split])
        print(len(_idx[ : split]))
        train_idx.extend(_idx[split:])        
 
    all_idx=np.arange(LABEL.shape[0])

    val_idx=np.array(val_idx)
    train_idx=np.array(train_idx)
    
    df_train=dataset.loc[train_idx,:]
    df_val=dataset.loc[val_idx,:]
    df_val["data_type"]=["val"]*val_idx.shape[0]
    
    return df_train, df_val

data_path=os.path.join("/opt/omniai/work/instance1/jupyter/", "v3_new_email","datasets","split_data")
data_name=[x for x in os.listdir(data_path) if x.split("_")[-2]=="pickle"]
df=pd.DataFrame()
for data in data_name:
    x=pd.read_pickle(os.path.join(data_path,data))
    df=pd.concat([df,x],axis=0,ignore_index=True)
    # print("{:<20}{:<20,}".format(data.split("_")[-1],x.shape[0]))

### only keep emails with status=closed
df=df[df.state=="closed"]

df['time'] = pd.to_datetime(df['time'])
df.sort_values(by='time', inplace = True) 
## train: 09/2022 ~ 01/2023. validation: 02/2023  test: 03/2023
set_categories=lambda row: "train" if (row["year"] in [2022,2023] and row["month"] in [9,10,11,12,1,2,3]) else "test"
df["data_type"]=df.progress_apply(set_categories,axis=1)
df.loc[:,'target']=df.loc[:,'is_complaint'].progress_apply(lambda x: 1 if x=="Y" else 0)
df.loc[:,'is_feedback']=df.loc[:,'is_feedback'].progress_apply(lambda x: 1 if x=="Y" else 0)

df1=df[df.data_type=="train"]
df1=df1.reset_index(drop=True)
df_train,df_val=val_mask_creation(df1,'is_complaint', validation_split=0.2)

df_test=df[df.data_type=="test"]
df=pd.concat([df_train,df_val,df_test],axis=0,ignore_index=True)

In [None]:
def label_distribution(df,data_type):
    df=df[df["data_type"]==data_type]
    tempt1=pd.DataFrame(df["is_complaint"].value_counts(dropna=False)).reset_index().rename(columns={'index':'is_complaint','is_complaint':'count'})
    tempt2=pd.DataFrame(df["is_complaint"].value_counts(dropna=False,normalize=True)).reset_index().rename(columns={'index':'is_complaint','is_complaint':'percentage'})
    tempt3=tempt1.merge(tempt2, on="is_complaint", how="inner")
    tempt3['data_type']=data_type
    tempt3=tempt3.loc[:,['data_type','is_complaint','count','percentage']]
    return tempt3

def style_format(df):
    return df.style.format({'count':'{:,}','percentage':'{:.2%}'})\
           .set_caption(f"label distribution")\
           .set_table_styles([{'selector': 'caption','props': [('color', 'red'),('font-size', '15px')]}])

In [None]:
dist_df=pd.DataFrame()
dist_df=pd.concat([dist_df,label_distribution(df,"train")])
dist_df=pd.concat([dist_df,label_distribution(df,"val")])
dist_df=pd.concat([dist_df,label_distribution(df,"test")])
style_format(dist_df)