# Submmit fasta file to the TACOS (Cell Type: HELA)
    (https://balalab-skku.org/TACOS/) 
    You can get the prediction (Halo-seq_lncRNA_TACOS_predict.txt).

***
###  

# Python
# To evaluate performance of TACOS

In [1]:
import copy
import os
import collections
import numpy as np
import pandas as pd
import sklearn.metrics as metrics
import matplotlib.pyplot as plt

In [2]:
# Evaluate performance of model
def evaluate_performance(y_test, y_pred, y_prob):
    # AUROC
    auroc = metrics.roc_auc_score(y_test,y_prob)
    auroc_curve = metrics.roc_curve(y_test, y_prob)
    # AUPRC
    auprc=metrics.average_precision_score(y_test, y_prob) 
    auprc_curve=metrics.precision_recall_curve(y_test, y_prob)
    #Accuracy
    accuracy=metrics.accuracy_score(y_test,y_pred) 
    #MCC
    mcc=metrics.matthews_corrcoef(y_test,y_pred)
    
    recall=metrics.recall_score(y_test, y_pred)
    precision=metrics.precision_score(y_test, y_pred)
    f1=metrics.f1_score(y_test, y_pred)
    class_report=metrics.classification_report(y_test, y_pred,target_names = ["control","case"])

    model_perf = {"auroc":auroc,"auroc_curve":auroc_curve,
                  "auprc":auprc,"auprc_curve":auprc_curve,
                  "accuracy":accuracy, "mcc": mcc,
                  "recall":recall,"precision":precision,"f1":f1,
                  "class_report":class_report}
        
    return model_perf

In [3]:
# Output result of evaluation
def eval_output(model_perf,path):
    with open(os.path.join(path,"Evaluate_Result_Halo_seq.txt"),'w') as f:
        f.write("AUROC=%s\tAUPRC=%s\tAccuracy=%s\tMCC=%s\tRecall=%s\tPrecision=%s\tf1_score=%s\n" %
               (model_perf["auroc"],model_perf["auprc"],model_perf["accuracy"],model_perf["mcc"],model_perf["recall"],model_perf["precision"],model_perf["f1"]))
        f.write("\n######NOTE#######\n")
        f.write("#According to help_documentation of sklearn.metrics.classification_report:in binary classification, recall of the positive class is also known as sensitivity; recall of the negative class is specificity#\n\n")
        f.write(model_perf["class_report"])
    
    roc_auc = model_perf["auroc"]
    # AUROC info
    fpr,tpr,threshold = model_perf["auroc_curve"]
    #return AUROC info
    temp_df = pd.DataFrame({"FPR":fpr,"TPR":tpr})
    temp_df.to_csv(os.path.join(path,"AUROC_info.txt"),header = True,index = False, sep = '\t')
    
    prc_auc = model_perf["auprc"]
    precision,recall,prc_threshod = model_perf["auprc_curve"]
    #return AUPRC info
    temp_df_2 = pd.DataFrame({"Recall":recall,"Precision":precision})
    temp_df_2.to_csv(os.path.join(path,"AUPRC_info.txt"),header = True,index = False, sep = '\t')

In [4]:
input_file = pd.read_csv("./Halo-seq_lncRNA_TACOS_predict.txt")

input_file["ensemble_transcript_id"],input_file["True_Label"] = input_file['Name'].str.split('_',1).str
class_mapping = {'Nucleus': 1,'Cytoplasm': 0}
input_file["Pre_Label"] = input_file["Location"].map(class_mapping)
input_file["Cyto_Score"] = input_file["Probability"]
input_file["Nuc_Score"] = 1 - input_file["Cyto_Score"]

In [5]:
outcome = input_file.iloc[:,3:8]
outcome[["True_Label"]] = outcome[["True_Label"]].astype(int)
outcome[["Pre_Label"]] = outcome[["Pre_Label"]].astype(int)

In [6]:
# Output performance of lncLocator
path = "./Evaluation_Result"
if not (os.path.exists(path)):
    os.mkdir(path)
model_perf = evaluate_performance(outcome["True_Label"],outcome["Pre_Label"],outcome["Nuc_Score"])
eval_output(model_perf,path)

In [7]:
outcome_df = outcome.iloc[:,[0,1,2]]
outcome_df.rename(columns={"True_Label":"tag","Pre_Label":"predict_label"},inplace = True)
outcome_df.to_csv(os.path.join(path,"lncRNA_sublocation_Halo_seq_TACOS_predict.tsv"),sep = '\t',index = False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)
