In [None]:
import sys
sys.path.append("/opt/omniai/work/instance1/jupyter/v5_new_email/Fine-Tuning")
sys.path=list(set(sys.path))

In [None]:
import math
import argparse
import os
import pandas as pd
pd.options.mode.chained_assignment=None
from tqdm.auto import tqdm
tqdm.pandas(position=0,leave=True)
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
from sklearn import metrics
import utils

from transformers import AutoTokenizer, AutoModel, AutoConfig

In [None]:
def metrics_read(df, model_name):
    # df=pd.read_csv(os.path.join(output_dir , file_name))
    true_y=df["True_label"].values
    pred_y=df["Predicted_label"].values
    pred_prob=df["Predicted_prob"].values
    best_threshold=df['best_threshold'].unique()[0]

    # test_output=utils.model_evaluate(true_y.reshape(-1),pred_y)
    test_output=utils.model_evaluate(true_y.reshape(-1),pred_prob,best_threshold)
    metric=pd.DataFrame()
    metric["model_type"]=[f"{model_name}"]
    metric["total complaint #"]=[test_output["total positive"]]
    metric["false_positive"]=[test_output["false positive"]]
    metric["false_negative"]=[test_output["false_negative"]]
    metric["precision"]=[test_output["precision"]]
    metric["recall"]=[test_output["recall"]]
    metric["f1_score"]=[test_output["f1_score"]]
    metric["roc_auc"]=[test_output["AUC"]]
    metric["pr_auc"]=[test_output["pr_auc"]]
    return metric

def style_format(metrics, type="test set"):
    # metrics=metrics[metrics["model_type"].apply(lambda x : x.split("-")[0]==model.split("-")[0])].reset_index(drop=True)
    return metrics.style.format({"total complaint #":"{:,}","false_positive":"{:,}","false_negative":"{:,}", "precision":"{:.2%}", "recall":"{:.2%}", \
                                "f1_score":"{:.2%}", "roc_auc":"{:.2%}", "pr_auc":"{:.2%}"}) \
    .set_caption(f"Performance Summary for {type} ") \
    .set_table_styles([{
        'selector': 'caption',
        'props': [
            ('color', 'red'),
            ('font-size', '15px')
        ]
    }])

def dist_func(df, cols):
    tempt1=pd.DataFrame(df[cols].value_counts(dropna=False)).reset_index().rename(columns={'index':cols,cols:'count'})
    tempt2=pd.DataFrame(df[cols].value_counts(dropna=False,normalize=True)).reset_index().rename(columns={'index':cols,cols:'percentage'})
    tempt3=tempt1.merge(tempt2, on=cols, how="inner")
    tempt3=tempt3.loc[:,[cols,'count','percentage']]
    return tempt3

def style_format_dist(df,title):
    return df.style.format({'count':'{:,}','percentage':'{:.2%}'})\
           .set_caption(f"{title}")\
           .set_table_styles([{'selector': 'caption','props': [('color', 'red'),('font-size', '12px')]}])

def metrics_df_func(output_dir, model_name):
    data_name=[x for x in os.listdir(output_dir) if x.split(".")[-1]=="csv"]
    data_name=sorted(data_name)
    df=pd.read_csv(os.path.join(output_dir , data_name[0]))
    metrics=metrics_read(df,model_name)
    N=data_name[0].split("_")[1].split(".")[0]
    metrics.insert(0,"Recall in Val",[f"recall>={N}0%"])
    
    for i in range(1,len(data_name)):
        df=pd.read_csv(os.path.join(output_dir , data_name[i]))
        tempt=metrics_read(df,model_name)
        N=data_name[i].split("_")[1].split(".")[0]
        tempt.insert(0,"Recall in Val",[f"recall>={N}%"])
        metrics=pd.concat([metrics,tempt],axis=0,ignore_index=True)
        
    return metrics

def metrics_df(output_dir, model_name):
    data_name=[x for x in os.listdir(output_dir) if x.split(".")[-1]=="csv"]
    data_name=sorted(data_name)
    df=pd.read_csv(os.path.join(output_dir , data_name[0]))
    metrics=metrics_read(df,model_name)
    for i in range(1,len(data_name)):
        df=pd.read_csv(os.path.join(output_dir , data_name[i]))
        metrics=pd.concat([metrics,metrics_read(df,model_name)],axis=0,ignore_index=True)
        
    metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
    return metrics


def response_rate_eval(logit,label,topk):
    DF=pd.DataFrame(columns=["pred_score","actual_label"])
    DF["pred_score"]=logit
    DF["actual_label"]=label
    DF.sort_values(by="pred_score", ascending=False, inplace=True)
    response_rate={}
    for p in topk:
        N=math.ceil(int(DF.shape[0]*p))
        DF2=DF.nlargest(N,"pred_score",keep="first")
        response_rate[str(int(p*100))+"%"]=DF2.actual_label.sum()/DF2.shape[0]
    return response_rate

from matplotlib.ticker import FuncFormatter
def bar_plot(data, colors=None, total_width=0.8, single_width=1, legend=True,title=None,subtitle=None,axis_truncation=0.5):
    """Draws a bar plot with multiple bars per data point.

    Parameters
    ----------
    ax : matplotlib.pyplot.axis
        The axis we want to draw our plot on.

    data: dictionary
        A dictionary containing the data we want to plot. Keys are the names of the
        data, the items is a list of the values.

        Example:
        data = {
            "x":[1,2,3],
            "y":[1,2,3],
            "z":[1,2,3],
        }

    colors : array-like, optional
        A list of colors which are used for the bars. If None, the colors
        will be the standard matplotlib color cyle. (default: None)

    total_width : float, optional, default: 0.8
        The width of a bar group. 0.8 means that 80% of the x-axis is covered
        by bars and 20% will be spaces between the bars.

    single_width: float, optional, default: 1
        The relative width of a single bar within a group. 1 means the bars
        will touch eachother within a group, values less than 1 will make
        these bars thinner.

    legend: bool, optional, default: True
        If this is set to true, a legend will be added to the axis.
    """

    # Check if colors where provided, otherwhise use the default color cycle
    
    fig, ax = plt.subplots(figsize =(15, 8))
    
    if colors is None:
        colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
    
    # Number of bars per group
    n_bars = len(data)

    # The width of a single bar
    bar_width = total_width / n_bars

    # List containing handles for the drawn bars, used for the legend
    bars = []

    # Iterate over all data
    for i, (name, values) in enumerate(data.items()):
        # The offset in x direction of that bar
        x_offset = (i - n_bars / 2) * bar_width + bar_width / 2

        # Draw a bar for every value of that type
        for x, y in enumerate(values.values()):
            bar = ax.bar(x + x_offset, y, width=bar_width * single_width, color=colors[i % len(colors)])

        # Add a handle to the last drawn bar, which we'll need for the legend
        bars.append(bar[0])

    # Draw legend if we need
    if legend:
        ax.legend(bars, data.keys())
    
    ax.set_ylabel('Accuracy',fontsize=15)
    ax.yaxis.set_major_formatter(FuncFormatter(lambda y,_: "{:.0%}".format(y)))
    ind=np.arange(len(data[list(data.keys())[0]]))
    ax.set_xticks(ind)
    ax.set_xticklabels( ('top 1% score', 'top 2% score', 'top 5% score','top 10% score') )
    ax.set_title(f"Top Predicted Score  ",fontsize=15)
    
    #     plt.xlim([0, 1])
    # plt.ylim([axis_truncation, 1])
    plt.show()


In [None]:
RECALL=[]
PRECISION=[]

In [None]:
test_date="05_23"
number_feature=990
data_name="test_data_"+str(number_feature)
input_data=f"/opt/omniai/work/instance1/jupyter/v5_new_email/TFIDF/production/tfidf_data/{test_date}/{data_name}"
tst=pd.read_pickle(input_data)
a, b=tst["target_variable"].value_counts().tolist()
print("Precision of human review : {:.2%}".format(b/(a+b)))

PRECISION.append(b/(a+b))

In [None]:
model_name="lightgbm"
test_date="05_23"
number_feature=990

output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/TFIDF/production/tfidf_model/{test_date}/{number_feature}/{model_name}/"
metrics=metrics_df_func(output_dir, model_name)
# metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
metrics.sort_values("recall",inplace=True)
metrics=metrics.reset_index(drop=True)
metrics.loc[0,"Recall in Val"]="default"

# style_format(metrics,  type=f"{model_name} model")

PRECISION.append(metrics[metrics['Recall in Val']=="recall>=97%"].precision.values[0])
RECALL.append(metrics[metrics['Recall in Val']=="recall>=97%"].recall.values[0])

In [None]:
test_date="06_23"
number_feature=990
data_name="test_data_"+str(number_feature)
input_data=f"/opt/omniai/work/instance1/jupyter/v5_new_email/TFIDF/production/tfidf_data/{test_date}/{data_name}"
tst=pd.read_pickle(input_data)
a, b=tst["target_variable"].value_counts().tolist()
print("Precision of human review : {:.2%}".format(b/(a+b)))

PRECISION.append(b/(a+b))

In [None]:
data_dir="/opt/omniai/work/instance1/jupyter/v5_new_email/TFIDF/production/inference_test_data/results/"
df=pd.read_csv(os.path.join(data_dir,"predictions_06.csv"))
metrics=metrics_read(df, model_name="lightgbm-frozen-06")
RECALL.append(metrics.recall.values[0])
PRECISION.append(metrics.precision.values[0])

In [None]:
model_name="lightgbm"
test_date="06_23"
number_feature=990

output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/TFIDF/production/tfidf_model/{test_date}/{number_feature}/{model_name}/"
metrics=metrics_df_func(output_dir, model_name)
# metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
metrics.sort_values("recall",inplace=True)
metrics=metrics.reset_index(drop=True)
metrics.loc[0,"Recall in Val"]="default"

# style_format(metrics,  type=f"{model_name} model")
PRECISION.append(metrics[metrics['Recall in Val']=="recall>=97%"].precision.values[0])
RECALL.append(metrics[metrics['Recall in Val']=="recall>=97%"].recall.values[0])

In [None]:
test_date="07_23"
number_feature=990
data_name="test_data_"+str(number_feature)
input_data=f"/opt/omniai/work/instance1/jupyter/v5_new_email/TFIDF/production/tfidf_data/{test_date}/{data_name}"
tst=pd.read_pickle(input_data)
a, b=tst["target_variable"].value_counts().tolist()
print("Precision of human review : {:.2%}".format(b/(a+b)))

PRECISION.append(b/(a+b))

In [None]:
data_dir="/opt/omniai/work/instance1/jupyter/v5_new_email/TFIDF/production/inference_test_data/results/"
df=pd.read_csv(os.path.join(data_dir,"predictions_07.csv"))
metrics=metrics_read(df, model_name="lightgbm-frozen-07")
RECALL.append(metrics.recall.values[0])
PRECISION.append(metrics.precision.values[0])

In [None]:
model_name="lightgbm"
test_date="07_23"
number_feature=990

output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/TFIDF/production/tfidf_model/{test_date}/{number_feature}/{model_name}/"
metrics=metrics_df_func(output_dir, model_name)
# metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
metrics.sort_values("recall",inplace=True)
metrics=metrics.reset_index(drop=True)
metrics.loc[0,"Recall in Val"]="default"

# style_format(metrics,  type=f"{model_name} model")

PRECISION.append(metrics[metrics['Recall in Val']=="recall>=97%"].precision.values[0])
RECALL.append(metrics[metrics['Recall in Val']=="recall>=97%"].recall.values[0])

In [None]:
df_recall=pd.DataFrame({"model":["TFIDF-ligtgbm","TFIDF-ligtgbm-frozen","TFIDF-ligtgbm-dynamic","TFIDF-ligtgbm-frozen","TFIDF-ligtgbm-dynamic"],
                        "date":["05/2023",  "06/2023", "06/2023", "07/2023", "07/2023"],
                        "Recall":RECALL
                       })

colors = {"TFIDF-ligtgbm": "red", "TFIDF-ligtgbm-frozen": "green", "TFIDF-ligtgbm-dynamic": "purple"}

# Create a bar plot
plt.figure(figsize=(12, 8))

dates = df_recall['date'].unique()
width = 0.2
for idx, date in enumerate(dates):
    subset = df_recall[df_recall['date'] == date]
    for j, (model, precision) in enumerate(zip(subset['model'], subset['Recall'])):
        plt.bar(idx + j*width, precision, width, color=colors[model])
        plt.annotate(f"{precision*100:.2f}%",
                     (idx + j*width, precision),
                     ha='center', va='bottom',
                     fontsize=10, color='black')

# Adjust x ticks and labels
plt.xticks([i + 0.3 for i in range(len(dates))], dates)

# Explicitly setting the legend
from matplotlib.lines import Line2D
legend_handles = [Line2D([0], [0], color=color, lw=4) for model, color in colors.items()]
plt.legend(legend_handles, colors.keys(), title='Model', loc='upper right')

plt.title('Recall of Different Models Over Time')
plt.ylabel('Recall')
plt.xlabel('Date')
plt.ylim([0.6,1.05])
plt.tight_layout()
plt.show()


In [None]:
df_precision=pd.DataFrame({"model":["human-review","TFIDF-ligtgbm","human-review","TFIDF-ligtgbm-frozen","TFIDF-ligtgbm-dynamic","human-review","TFIDF-ligtgbm-frozen","TFIDF-ligtgbm-dynamic"],
                        "date":["05/2023","05/2023","06/2023","06/2023","06/2023","07/2023","07/2023","07/2023"],
                        "precision":PRECISION
                       })

colors = {"human-review": "blue", "TFIDF-ligtgbm": "red", "TFIDF-ligtgbm-frozen": "green", "TFIDF-ligtgbm-dynamic": "purple"}

# Create a bar plot
plt.figure(figsize=(12, 8))

dates = df_precision['date'].unique()
width = 0.2
for idx, date in enumerate(dates):
    subset = df_precision[df_precision['date'] == date]
    for j, (model, precision) in enumerate(zip(subset['model'], subset['precision'])):
        plt.bar(idx + j*width, precision, width, color=colors[model])
        plt.annotate(f"{precision*100:.2f}%",
                     (idx + j*width, precision),
                     ha='center', va='bottom',
                     fontsize=10, color='black')

# Adjust x ticks and labels
plt.xticks([i + 0.3 for i in range(len(dates))], dates)

# Explicitly setting the legend
from matplotlib.lines import Line2D
legend_handles = [Line2D([0], [0], color=color, lw=4) for model, color in colors.items()]
plt.legend(legend_handles, colors.keys(), title='Model', loc='upper right')

plt.title('Precision of Different Models Over Time')
plt.ylabel('Precision')
plt.xlabel('Date')
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

# Define precision and recall values for each model
# models = ['TFIDF_number_500','TFIDF_number_750','TFIDF_number_1000','TFIDF_number_5000']
# markers = ['o', 's', 'D','x']
# colors = ['blue', 'orange','green','red']

models = ['TFIDF_number_500','TFIDF_number_1000','TFIDF_number_5000']
markers = ['o', 's','x']
colors = ['blue', 'orange','red']


# Plot precision and recall
plt.figure(figsize=(10, 8))

# Iterate over models
for i in range(len(models)):
    plt.plot(recall[i], precision[i], marker=markers[i],  color=colors[i], label=models[i], linewidth=3, linestyle=":", markersize=8)

plt.xlabel('Recall', fontsize=14)
plt.ylabel('Precision', fontsize=14)
plt.title('Precision-Recall Curve \n(test_set=05/2023)', fontsize=16)
plt.grid(True)

# Format axis values as percentages
ax = plt.gca()
ax.xaxis.set_major_locator(mtick.MultipleLocator(base=0.01))
ax.xaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=0))
ax.yaxis.set_major_locator(mtick.MultipleLocator(base=0.001))
ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=2))

plt.ylim(0.008,0.015)
plt.xlim(0.90,1.001)

# Add horizontal line for benchmark model
# benchmark_precision=0.0053
# plt.axhline(y=benchmark_precision, color=(0.8,0.7,0.5),linestyle='--', linewidth=3)

# Set the legend
plt.legend()
# plt.legend(models+["Lexican Search"],bbox_to_anchor=(1,0.5), fontsize=14)

# Show the plot
plt.show()

#### longformer base

In [None]:
model_name="longformer_base_4096_customized"
test_date="05_23"

output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/Fine-Tuning/results/{test_date}/{model_name}/"
model_name="longformer_base"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="longformer_base model")

#### longformer large

In [None]:
model_name="longformer_large_4096_customized"
test_date="05_23"

output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/Fine-Tuning/results/{test_date}/{model_name}/"
model_name="longformer_large"
metrics=metrics_df_func(output_dir, model_name)
metrics.drop_duplicates(subset=["recall"],inplace=True, keep="first")
style_format(metrics,  type="longformer_base model")

In [None]:
precision=[]
recall=[]

test_date="05_23"
number_feature=995
model_name="lightgbm"
output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/TFIDF/production/tfidf_model/{test_date}/{number_feature}/{model_name}/"
df=metrics_df(output_dir, "lightgbm")
df=df[df.recall>0.9]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

test_date="05_23"
model_name="roberta_large_customized"
output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/Fine-Tuning/results/{test_date}/{model_name}/"
df=metrics_df(output_dir, "roberta_large")
df=df[df.recall>0.9]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

test_date="05_23"
model_name="deberta_v3_large"
output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/Fine-Tuning/results/{test_date}/{model_name}/"
df=metrics_df(output_dir, "deberta_v3")
df=df[df.recall>0.9]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

test_date="05_23"
model_name="longformer_base_4096_customized"
output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/Fine-Tuning/results/{test_date}/{model_name}/"
df=metrics_df(output_dir, "longformer_base")
df=df[df.recall>0.9]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

test_date="05_23"
model_name="longformer_large_4096_customized"
output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/Fine-Tuning/results/{test_date}/{model_name}/"
df=metrics_df(output_dir, "longformer_large")
df=df[df.recall>0.9]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

test_date="05_23"
model_name="bigbird_roberta_large_customized"
output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/Fine-Tuning/results/{test_date}/{model_name}/"
df=metrics_df(output_dir, "bigbird")
df=df[df.recall>0.9]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

# Define precision and recall values for each model
# models = ['TFIDF_lightgbm','Roberta-Large','Deberta-v3','longformer-base','longformer-large', 'bigbird']
# markers = ['o', 's', 'x', '*', '<', 'p']
# colors = ['blue', 'green','purple','red','black','brown']

models = ['TFIDF_lightgbm','Roberta-Large','Deberta-v3','longformer-base','longformer-large','bigbird']
markers = ['o', 's', 'x', '*', '<','D']
colors = ['blue', 'green','purple','red','black','orange']

# Plot precision and recall
plt.figure(figsize=(10, 8))

# Iterate over models
for i in range(len(models)):
    plt.plot(recall[i], precision[i], marker=markers[i],  color=colors[i], label=models[i], linewidth=3, linestyle=":", markersize=8)

plt.xlabel('Recall', fontsize=14)
plt.ylabel('Precision', fontsize=14)
plt.title('Precision-Recall Curve \n(test_set=05/2023)', fontsize=16)
plt.grid(True)

# Format axis values as percentages
ax = plt.gca()
ax.xaxis.set_major_locator(mtick.MultipleLocator(base=0.01))
ax.xaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=0))
ax.yaxis.set_major_locator(mtick.MultipleLocator(base=0.001))
ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=2))

plt.ylim(0.006,0.03)
plt.xlim(0.96,1.001)

# Add horizontal line for benchmark model
benchmark_precision=0.0074
plt.axhline(y=benchmark_precision, color=(0.8,0.7,0.5),linestyle='--', linewidth=3)

# Set the legend
plt.legend(loc='upper left')
plt.legend(models+["Lexican Search"],bbox_to_anchor=(1.35,0.5), fontsize=14)
# plt.legend(models+["Lexican Search"], fontsize=14)

# Show the plot
plt.show()

In [None]:
precision=[]
recall=[]

test_date="05_23"
model_name="roberta_large"
output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/Fine-Tuning/results/{test_date}/{model_name}/"
df=metrics_df(output_dir, "roberta_large")
df=df[df.recall>0.9]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

test_date="05_23"
model_name="roberta_large_customized"
output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/Fine-Tuning/results/{test_date}/{model_name}/"
df=metrics_df(output_dir, "roberta_large_customized")
df=df[df.recall>0.9]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())


test_date="05_23"
model_name="longformer_base_4096"
output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/Fine-Tuning/results/{test_date}/{model_name}/"
df=metrics_df(output_dir, "longformer_base")
df=df[df.recall>0.9]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

test_date="05_23"
model_name="longformer_base_4096_customized"
output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/Fine-Tuning/results/{test_date}/{model_name}/"
df=metrics_df(output_dir, "longformer_base_customized")
df=df[df.recall>0.9]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

test_date="05_23"
model_name="longformer_large_4096"
output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/Fine-Tuning/results/{test_date}/{model_name}/"
df=metrics_df(output_dir, "longformer_large")
df=df[df.recall>0.9]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

test_date="05_23"
model_name="longformer_large_4096_customized"
output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/Fine-Tuning/results/{test_date}/{model_name}/"
df=metrics_df(output_dir, "longformer_large_customized")
df=df[df.recall>0.9]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

test_date="05_23"
model_name="bigbird_roberta_large"
output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/Fine-Tuning/results/{test_date}/{model_name}/"
df=metrics_df(output_dir, "bigbird")
df=df[df.recall>0.9]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

test_date="05_23"
model_name="bigbird_roberta_large_customized"
output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/Fine-Tuning/results/{test_date}/{model_name}/"
df=metrics_df(output_dir, "bigbird_customized")
df=df[df.recall>0.9]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

# Define precision and recall values for each model
models = ['Roberta-Large','Roberta-Large-customized','longformer-base',\
          'longformer-base-customized','longformer-large', 'longformer-large-customized',
          'bigbird', 'bigbird-customized']

markers = ['o', 's', 'x', '*', '<', 'p','>','D']
colors = ['blue', 'green','purple','red','black','brown','lawngreen','orange']

# Plot precision and recall
plt.figure(figsize=(10, 8))

# Iterate over models
for i in range(len(models)):
    plt.plot(recall[i], precision[i], marker=markers[i],  color=colors[i], label=models[i], linewidth=3, linestyle=":", markersize=8)

plt.xlabel('Recall', fontsize=14)
plt.ylabel('Precision', fontsize=14)
plt.title('Precision-Recall Curve \n(test_set=05/2023)', fontsize=16)
plt.grid(True)

# Format axis values as percentages
ax = plt.gca()
ax.xaxis.set_major_locator(mtick.MultipleLocator(base=0.01))
ax.xaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=0))
ax.yaxis.set_major_locator(mtick.MultipleLocator(base=0.001))
ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=2))

plt.ylim(0.006,0.03)
plt.xlim(0.96,1.001)

# Add horizontal line for benchmark model
benchmark_precision=0.0074
plt.axhline(y=benchmark_precision, color=(0.8,0.7,0.5),linestyle='--', linewidth=3)

# Set the legend
plt.legend(loc='upper left')
plt.legend(models+["Lexican Search"],bbox_to_anchor=(1,0.5), fontsize=14)
# plt.legend(models+["Lexican Search"], fontsize=14)

# Show the plot
plt.show()

In [None]:
# idx=metrics.groupby("model_type")["recall"].idxmax()
# metrics_2=metrics.loc[idx]
# desired_order=["randomforest","xgboost","lightgbm","roberta_large","deberta_v3_large","longformer_base","longformer_large","bigbird"]
# metrics_2["model_type"]=pd.Categorical(metrics_2["model_type"],categories=desired_order,ordered=True)
# metrics_2=metrics_2.sort_values(by="model_type")
# style_format(metrics_2,  type="Different Models")

In [None]:
precision=[]
recall=[]

test_date="05_23"
number_feature=995
model_name="lightgbm"
output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/TFIDF/production/tfidf_model/{test_date}/{number_feature}/{model_name}/"
df=metrics_df(output_dir, "lightgbm")
df=df[df.recall>0.9]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())


test_date="05_23"
model_name="longformer_base_4096_customized"
output_dir=f"/opt/omniai/work/instance1/jupyter/v5_new_email/Fine-Tuning/results/{test_date}/{model_name}/"
df=metrics_df(output_dir, "longformer_base")
df=df[df.recall>0.9]
precision.append(df["precision"].tolist())
recall.append(df["recall"].tolist())

import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

# Define precision and recall values for each model
# models = ['TFIDF_lightgbm','Roberta-Large','Deberta-v3','longformer-base','longformer-large', 'bigbird']
# markers = ['o', 's', 'x', '*', '<', 'p']
# colors = ['blue', 'green','purple','red','black','brown']

models = ['TFIDF_model','longformer-base']
markers = ['o', 'D']
colors = ['blue', 'red']

# Plot precision and recall
plt.figure(figsize=(10, 8))

# Iterate over models
for i in range(len(models)):
    plt.plot(recall[i], precision[i], marker=markers[i],  color=colors[i], label=models[i], linewidth=3, linestyle=":", markersize=8)

plt.xlabel('Recall', fontsize=14)
plt.ylabel('Precision', fontsize=14)
plt.title('Precision-Recall Curve \n(test_set=05/2023)', fontsize=16)
plt.grid(True)

# Format axis values as percentages
ax = plt.gca()
ax.xaxis.set_major_locator(mtick.MultipleLocator(base=0.01))
ax.xaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=0))
ax.yaxis.set_major_locator(mtick.MultipleLocator(base=0.001))
ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=2))

plt.ylim(0.006,0.03)
plt.xlim(0.96,1.001)

# Add horizontal line for benchmark model
benchmark_precision=0.0074
plt.axhline(y=benchmark_precision, color=(0.8,0.7,0.5),linestyle='--', linewidth=3)

# Set the legend
plt.legend(loc='upper left')
plt.legend(models+["Lexican Search"],bbox_to_anchor=(1.35,0.5), fontsize=14)
# plt.legend(models+["Lexican Search"], fontsize=14)

# Show the plot
plt.show()