# mRNALoc was performed on local Mac to predict mRNA localization
    perl mRNALoc.pl mRNA_sublocation_TestSet.fa 0.1
    
    Then, we got mRNALoc_Result.

In [1]:
import numpy as np
import pandas as pd
import sklearn.metrics as metrics
import re
import os

In [2]:
mRNALoc_Result = pd.read_csv("mRNALoc_Result",sep = '\t')

In [3]:
ensemble_transcript_id = mRNALoc_Result["SeqID"].apply(lambda x: re.split('_',x)[0])
tag = mRNALoc_Result["SeqID"].apply(lambda x: re.split('_',x)[1])
predict_label =  mRNALoc_Result["Location"].apply(lambda x: 1 if x=="Nucleus" else 0)

In [4]:
outcome_df = pd.concat([ensemble_transcript_id,tag,predict_label],axis = 1)
outcome_df.columns = ["ensemble_transcript_id","tag","predict_label"]

In [5]:
path = "./Evaluation_Result"
if not (os.path.exists(path)):
    os.mkdir(path)

In [6]:
outcome_df.to_csv(os.path.join(path,"mRNA_sublocation_TestSet_mRNALoc_predict.tsv"),sep = '\t',index = False)

In [7]:
#Evaluate performance of model
def evaluate_performance_mRNALoc(y_test, y_pred):
    #Accuracy
    accuracy=metrics.accuracy_score(y_test,y_pred) 
    #MCC
    mcc=metrics.matthews_corrcoef(y_test,y_pred)
    
    recall=metrics.recall_score(y_test, y_pred)
    precision=metrics.precision_score(y_test, y_pred)
    f1=metrics.f1_score(y_test, y_pred)
    class_report=metrics.classification_report(y_test, y_pred,target_names = ["control","case"])

    model_perf = {"accuracy":accuracy, "mcc": mcc,
                  "recall":recall,"precision":precision,"f1":f1,
                  "class_report":class_report}
        
    return model_perf

In [8]:
# Output result of evaluation
def eval_output_mRNALoc(model_perf,path):
    with open(os.path.join(path,"Evaluate_Result_TestSet.txt"),'w') as f:
        f.write("Accuracy=%s\tMCC=%s\tRecall=%s\tPrecision=%s\tf1_score=%s\n" %
               (model_perf["accuracy"],model_perf["mcc"],model_perf["recall"],model_perf["precision"],model_perf["f1"]))
        f.write("\n######NOTE#######\n")
        f.write("#According to help_documentation of sklearn.metrics.classification_report:in binary classification, recall of the positive class is also known as sensitivity; recall of the negative class is specificity#\n\n")
        f.write(model_perf["class_report"])

In [9]:
# Evaluate model performance
model_perf = evaluate_performance_mRNALoc(np.array(outcome_df["tag"],dtype = int),np.array(outcome_df["predict_label"],dtype = int))
eval_output_mRNALoc(model_perf,path)