In [2]:
import os
import sys
home_dir = "../../"
module_path = os.path.abspath(os.path.join(home_dir))
if module_path not in sys.path:
    sys.path.append(module_path)

import numpy as np
import pandas as pd
from models.aa_common.performance_metrics import *

In [3]:
task = "popu_freq"
model_root_and_name_tuple_list = [("dbnsfp", "sift"), ("dbnsfp", "polyphen2_HVAR"), ("dbnsfp", "cadd"), ("dbnsfp", "mvp"), ("dbnsfp", "metarnn"), ("dbnsfp", "revel"),
                                  ("tape_rao", "unirep"), ("tape_rao", "protbert"), ("sequnet_dunham", "sequnet"), 
                                  ("esm_rives", "esm1b_t33_650M_UR50S"), ("esm_rives", "esm1v_t33_650M_UR90S"), ("esm_rives", "esm2_t33_650M_UR50D"),
                                  ("bioembeddings_dallago", "plus_rnn"), 
                                  ("bioembeddings_dallago", "prottrans_bert_bfd"), ("bioembeddings_dallago", "prottrans_albert_bfd"),
                                  ("bioembeddings_dallago", "prottrans_xlnet_uniref100"), 
                                  ("bioembeddings_dallago", "prottrans_t5_bfd"), ("bioembeddings_dallago", "prottrans_t5_uniref50"), ("bioembeddings_dallago", "prottrans_t5_xl_u50")]

metrics = ["AUC-ROC", "AUC-PR", "F1-max", "Th-max", "Precision", "Recall", "Accuracy", "Balanced-accuracy", "MCC"]

In [4]:
result_df = pd.read_csv(home_dir+f"models/aa_common/merged_predictions/{task}_analysis.csv", sep="\t")
result_df = result_df[result_df["mt_freq"]>=.001] # common and rare only
print(result_df.shape)
print(result_df.columns)

result_df.loc[result_df["mt_freq"]>=.01, "class"] = "Common"
result_df.loc[(result_df["mt_freq"]<.01) & (result_df["mt_freq"]>=.001), "class"] = "Rare"

result_df.loc[result_df["class"]=="Common", "class_numeric"] = 0
result_df.loc[result_df["class"]=="Rare", "class_numeric"] = 1

(47662, 33)
Index(['snp_id', 'chrom_acc_version', 'chrom_pos', 'ref_allele', 'alt_allele',
       'prot_acc_version', 'prot_pos', 'wt', 'mut', 'wt_population',
       'mut_poulation', 'wt_freq', 'mt_freq', 'class', 'sift_pred',
       'polyphen2_HVAR_pred', 'cadd_pred', 'mvp_pred', 'metarnn_pred',
       'revel_pred', 'unirep_pred', 'protbert_pred', 'sequnet_pred',
       'esm1b_t33_650M_UR50S_pred', 'esm1v_t33_650M_UR90S_pred',
       'esm2_t33_650M_UR50D_pred', 'plus_rnn_pred', 'prottrans_bert_bfd_pred',
       'prottrans_albert_bfd_pred', 'prottrans_xlnet_uniref100_pred',
       'prottrans_t5_bfd_pred', 'prottrans_t5_uniref50_pred',
       'prottrans_t5_xl_u50_pred'],
      dtype='object')


In [5]:
output_file = home_dir+f"models/aa_common/performance_analysis/{task}.csv"

out = open(output_file, 'w')
out.write("Models\\Metrics")
for metric in metrics:
    out.write(f"\t{metric}")
out.write("\n")

for i, (model_root, model_name) in enumerate(model_root_and_name_tuple_list):
    print(model_name)
    model_pred_col = model_name + "_pred"
    result_df["pred"]=(result_df[model_pred_col]-result_df[model_pred_col].min())/(result_df[model_pred_col].max()-result_df[model_pred_col].min()) # scaling prediction scores between [0, 1]
    
    non_nan_result_df = result_df[~pd.isna(result_df[model_pred_col])]  # taking df for only non-NAN values
    
    auc_roc_score = get_auc_roc_score(non_nan_result_df)
    auc_pr_score, precisions, recalls, thresholds = get_auc_pr_score(non_nan_result_df)
    f1_max, th_max = get_f1max_and_th(precisions, recalls, thresholds)
    precision = get_precision_score(non_nan_result_df, th_max)
    recall = get_recall_score(non_nan_result_df, th_max)
    accuracy = get_accuracy_score(non_nan_result_df, th_max)
    balanced_accuracy = get_balanced_accuracy_score(non_nan_result_df, th_max)
    mcc = get_matthews_corrcoef(non_nan_result_df, th_max)
    
    permformance_scores = [auc_roc_score, auc_pr_score, f1_max, th_max, precision, recall, accuracy, balanced_accuracy, mcc]
    
    out.write(f"{model_name}")
    for score in permformance_scores:
        out.write(f"\t{score:.3f}")
    out.write("\n")
        
    
    # if i==5: break

out.close()

sift
	AUC-ROC: 0.609
	AUC-PR: 0.570
	Best F1-Score: 0.775 at threshold: 0.000
	Precision score: 0.632 at threshold: 0.000
	Recall score: 1.000 at threshold: 0.000
	Accuracy score: 0.632 at threshold: 0.000
	Balanced accuracy score: 0.500 at threshold: 0.000
	MCC score: 0.000 at threshold: 0.000
polyphen2_HVAR
	AUC-ROC: 0.609
	AUC-PR: 0.705
	Best F1-Score: 0.774 at threshold: 0.000
	Precision score: 0.632 at threshold: 0.000
	Recall score: 1.000 at threshold: 0.000
	Accuracy score: 0.632 at threshold: 0.000
	Balanced accuracy score: 0.500 at threshold: 0.000
	MCC score: 0.000 at threshold: 0.000
cadd
	AUC-ROC: 0.608
	AUC-PR: 0.710
	Best F1-Score: 0.771 at threshold: 0.120
	Precision score: 0.627 at threshold: 0.120
	Recall score: 1.000 at threshold: 0.120
	Accuracy score: 0.627 at threshold: 0.120
	Balanced accuracy score: 0.500 at threshold: 0.120
	MCC score: 0.016 at threshold: 0.120
mvp
	AUC-ROC: 0.592
	AUC-PR: 0.909
	Best F1-Score: 0.939 at threshold: 0.000
	Precision score: 0.886 a