In [None]:
import os
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import numpy
from sklearn import metrics

In [None]:
model_name="longformer_large_customized"
output_dir=os.path.join(os.getcwd(),model_name)
df=pd.read_csv(os.path.join(output_dir,"predictions.csv"))
true_y=df["True label"].values
pred_y=df["Predicted label"].values

In [None]:
print()
print(f"\n===========Test Set Performance===============\n")
print()

print(classification_report(true_y, pred_y))
print()
print()
print(confusion_matrix(true_y, pred_y))  

In [None]:
def metric_table(table_name="metrics_test.txt"):
    Model_Type=[]
    Total_Complaint=[]
    False_Positive=[]
    False_Negative=[]
    Precision=[]
    Recall=[]
    F1_Score=[]
    ROC_AUC=[]
    PR_AUC=[]

    with open(table_name,'r') as f:
        for line in f:
            Model_Type.append(str(line.split(",")[0]))
            Total_Complaint.append(int(line.split(",")[1]))
            False_Positive.append(int(line.split(",")[2]))
            False_Negative.append(int(line.split(",")[3]))
            Precision.append(float(line.split(",")[4]))
            Recall.append(float(line.split(",")[5]))
            F1_Score.append(float(line.split(",")[6]))
            ROC_AUC.append(float(line.split(",")[7]))
            PR_AUC.append(float(line.split(",")[8]))

    metrics=pd.DataFrame({"model_type":Model_Type,"total complaint #":Total_Complaint,"false_positive":False_Positive,"false_negative":False_Negative,\
                         "precision":Precision,"recall":Recall,"f1_score":F1_Score,"roc_auc":ROC_AUC,"pr_auc":PR_AUC})
    # metrics.drop_duplicates(subset=["model_type","epoch"],inplace=True)
    # metrics.sort_values(by=['model_type','epoch'],inplace=True)       
    
    return metrics

def style_format(metrics, type="test set"):
    # metrics=metrics[metrics["model_type"].apply(lambda x : x.split("-")[0]==model.split("-")[0])].reset_index(drop=True)
    return metrics.style.format({"total complaint #":"{:,}","false_positive":"{:,}","false_negative":"{:,}", "precision":"{:.2%}", "recall":"{:.2%}", \
                                "f1_score":"{:.2%}", "roc_auc":"{:.2%}","pr_auc":"{:.2%}"}) \
    .set_caption(f"Performance Summary For {type} ") \
    .set_table_styles([{
        'selector': 'caption',
        'props': [
            ('color', 'red'),
            ('font-size', '20px')
        ]
    }])

In [None]:
model_name="longformer_large"
output_dir=os.path.join(os.getcwd(),model_name)
pretrained_longformer=metric_table(table_name=os.path.join(output_dir,"metrics_test.txt"))
pretrained_longformer=pretrained_longformer.tail(1)
pretrained_longformer['model_type'] = pretrained_longformer['model_type'].replace(['longformer-large-4096'], 'pretrained-longformer')

In [None]:
model_name="longformer_large_customized"
output_dir=os.path.join(os.getcwd(),model_name)
customized_longformer=metric_table(table_name=os.path.join(output_dir,"metrics_test.txt"))
customized_longformer=customized_longformer.tail(1)
customized_longformer['model_type'] = customized_longformer['model_type'].replace(['longformer-large-4096'], 'customized-longformer')

In [None]:
metric_test = pd.concat([pretrained_longformer, customized_longformer], axis=0)
metric_test.drop_duplicates(subset=['model_type'],inplace=True)
style_format(metric_test,  type="test set")

In [None]:
import os
import time
import datetime
import math
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from collections import defaultdict
import argparse
import logging
import math

from sklearn.metrics import roc_auc_score, f1_score,average_precision_score
from sklearn.metrics import precision_recall_fscore_support 
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import auc 
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.metrics import roc_curve,precision_recall_curve

In [None]:
def confusion_matrix_func(output_dir,title_name):
    df=pd.read_csv(os.path.join(output_dir,"predictions.csv"))
    true_y=df["True label"].values
    pred_y=df["Predicted label"].values
    confusion_matrix = metrics.confusion_matrix(true_y, pred_y)
    cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = [False, True])
    cm_display.plot(values_format=',')
    plt.title(title_name)
    plt.show()

In [None]:
def table_read(table_name):
    with open(table_name,"r") as file:
        true_y=[]
        prob_y=[]
        for line in file:
            x,y,z=line.strip().split(',')
            true_y.append(int(x))
            prob_y.append(float(z)) 
    return true_y, prob_y

In [None]:
model_name="longformer_large"
out_dir=os.path.join(os.getcwd(),model_name)
confusion_matrix_func(out_dir,title_name="pretrained-longformer")

In [None]:
model_name="longformer_large_customized"
out_dir=os.path.join(os.getcwd(),model_name)
confusion_matrix_func(out_dir,title_name="customized-longformer")

In [None]:
def response_rate_eval(logit,label,topk):
    DF=pd.DataFrame(columns=["pred_score","actual_label"])
    DF["pred_score"]=logit
    DF["actual_label"]=label
    DF.sort_values(by="pred_score", ascending=False, inplace=True)
    response_rate={}
    for p in topk:
        N=math.ceil(int(DF.shape[0]*p))
        DF2=DF.nlargest(N,"pred_score",keep="first")
        response_rate[str(int(p*100))+"%"]=DF2.actual_label.sum()/DF2.shape[0]
    return response_rate

from matplotlib.ticker import FuncFormatter
def bar_plot(data, colors=None, total_width=0.8, single_width=1, legend=True,title=None,subtitle=None,axis_truncation=0.5):
    """Draws a bar plot with multiple bars per data point.

    Parameters
    ----------
    ax : matplotlib.pyplot.axis
        The axis we want to draw our plot on.

    data: dictionary
        A dictionary containing the data we want to plot. Keys are the names of the
        data, the items is a list of the values.

        Example:
        data = {
            "x":[1,2,3],
            "y":[1,2,3],
            "z":[1,2,3],
        }

    colors : array-like, optional
        A list of colors which are used for the bars. If None, the colors
        will be the standard matplotlib color cyle. (default: None)

    total_width : float, optional, default: 0.8
        The width of a bar group. 0.8 means that 80% of the x-axis is covered
        by bars and 20% will be spaces between the bars.

    single_width: float, optional, default: 1
        The relative width of a single bar within a group. 1 means the bars
        will touch eachother within a group, values less than 1 will make
        these bars thinner.

    legend: bool, optional, default: True
        If this is set to true, a legend will be added to the axis.
    """

    # Check if colors where provided, otherwhise use the default color cycle
    
    fig, ax = plt.subplots(figsize =(15, 8))
    
    if colors is None:
        colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
    
    # Number of bars per group
    n_bars = len(data)

    # The width of a single bar
    bar_width = total_width / n_bars

    # List containing handles for the drawn bars, used for the legend
    bars = []

    # Iterate over all data
    for i, (name, values) in enumerate(data.items()):
        # The offset in x direction of that bar
        x_offset = (i - n_bars / 2) * bar_width + bar_width / 2

        # Draw a bar for every value of that type
        for x, y in enumerate(values.values()):
            bar = ax.bar(x + x_offset, y, width=bar_width * single_width, color=colors[i % len(colors)])

        # Add a handle to the last drawn bar, which we'll need for the legend
        bars.append(bar[0])

    # Draw legend if we need
    if legend:
        ax.legend(bars, data.keys())
    
    ax.set_ylabel('Accuracy',fontsize=15)
    ax.yaxis.set_major_formatter(FuncFormatter(lambda y,_: "{:.0%}".format(y)))
    ind=np.arange(len(data[list(data.keys())[0]]))
    ax.set_xticks(ind)
    ax.set_xticklabels( ('top 1% score', 'top 2% score', 'top 5% score','top 10% score') )
    ax.set_title(f"Top Predicted Score  ",fontsize=15)
    
    #     plt.xlim([0, 1])
    # plt.ylim([axis_truncation, 1])
    plt.show()


In [None]:
def table_read(table_name):
    df=pd.read_csv(table_name)
    true_y=df["True label"].values
    pred_y=df["Predicted label"].values
    prob_y=df["Predicted_prob"].values
    return true_y.tolist(), prob_y.tolist()

In [None]:
model_name="longformer_large"
table_name=os.path.join(os.getcwd(),model_name,"predictions.csv")
pretrained_lf_true, pretrained_lf_prob=table_read(table_name)

In [None]:
model_name="longformer_large_customized"
table_name=os.path.join(os.getcwd(),model_name,"predictions.csv")
customized_lf_true, customized_lf_prob=table_read(table_name)

In [None]:
topk=[0.01,0.02,0.05,0.1]

response_v0 = response_rate_eval(pretrained_lf_prob,pretrained_lf_true, topk)
response_v1 = response_rate_eval(customized_lf_prob,customized_lf_true, topk)

if __name__ == "__main__":
    data = {
        "pretrained-longformer": response_v0,
        "customized-longformer": response_v1
        
    }

    
    CL=['r', 'g', 'b', 'c', 'y', 'darkorange', 'lime', 'grey','gold','bisque', 'lightseagreen', 'purple']
    bar_plot(data, colors=CL,total_width=.7, single_width=1,title="(response rate)",subtitle="Test Set ",axis_truncation=0.50)


In [None]:
### ROC AUC Curve

In [None]:
model_name="longformer_large"
table_name=os.path.join(os.getcwd(),model_name,"predictions.csv")
pretrained_lf_true, pretrained_lf_prob=table_read(table_name)

model_name="longformer_large_customized"
table_name=os.path.join(os.getcwd(),model_name,"predictions.csv")
customized_lf_true, customized_lf_prob=table_read(table_name)

In [None]:
pretrained_lf_fpr, pretrained_lf_tpr, _ = roc_curve(pretrained_lf_true,  pretrained_lf_prob)
customized_lf_fpr, customized_lf_tpr, _ = roc_curve(customized_lf_true,  customized_lf_prob)

fig = plt.subplots(nrows=1,ncols=1,figsize =(6, 4))
plt.subplot(1, 1, 1)
plt.plot(pretrained_lf_fpr, pretrained_lf_tpr, linestyle='solid', label='pretrained-longformer', color ='purple', linewidth=2)
plt.plot(customized_lf_fpr, customized_lf_tpr, linestyle='solid', label='customized-longformer', color ='red', linewidth=2)
plt.plot([0, 1], [0, 1], linestyle='solid', label='random model', color ='darkorange', linewidth=2)
plt.xlabel('False Positive Rate', fontweight ='bold',fontsize=15)
plt.ylabel('True Positive Rate', fontweight ='bold',fontsize=15)
plt.title(f'ROC AUC CURVE', fontweight ='bold',fontsize=18)
plt.xlim([0, 1.01])
plt.ylim([0, 1.01])
plt.legend(fontsize="large")
plt.show()