In [1]:
import pandas as pd
from time import time
import config
import libcirctis

test_dataset_fasta_file = 'datasets/splits/test/seqs.fa'
test_dataset_samples_file = 'datasets/splits/test/samples.tsv'

output_path_circTIS = 'outputs/circTIS_test_dataset/'
output_samples_eval_file_atg = 'outputs/circTIS_samples_eval_ATG.tsv'
output_samples_eval_file_nc = 'outputs/circTIS_samples_eval_NC.tsv'

In [2]:
tis_types = 'NC1'

start_time = time()

libcirctis.circtis_extern_execution(config.CIRCTIS_PATH, tis_types, test_dataset_fasta_file, output_path_circTIS)

end_time = time()
pred_time = end_time - start_time
pred_time

382.23035311698914

In [3]:
predictions_file = 'outputs/circTIS_test_dataset/all_possible_TIS.tsv'
df_predictions = pd.read_csv(predictions_file, sep='\t', header=0)
df_predictions.shape

(72183, 5)

In [4]:
df_predictions['circTIS_label'] = df_predictions['svm_score'].apply(lambda x: 1 if x > 0 else -1)
df_predictions[df_predictions['circTIS_label'] == 1].shape

(882, 6)

In [5]:
df_samples = pd.read_csv(test_dataset_samples_file, sep='\t', header=0)
df_samples.shape

(72183, 7)

In [6]:
df_samples_merge = pd.merge(df_samples, df_predictions, left_on=['circrna_id', 'TIS_position', 'TIS_type'], right_on=['circRNA_id', 'position', 'TIS_type'])
df_samples_merge.shape

(72183, 12)

In [7]:
df_samples_eval = df_samples_merge[['circRNA_id', 'TIS_type', 'TIS_position', 'sample_label', 'circTIS_label', 'svm_score']].copy()
df_samples_eval.rename(columns={'svm_score': 'circTIS_score'}, inplace=True)
df_samples_eval.shape

(72183, 6)

In [8]:
df_samples_eval_atg = df_samples_eval[df_samples_eval['TIS_type'] == 'ATG']
df_samples_eval_atg.to_csv(output_samples_eval_file_atg, sep='\t', index=False)
df_samples_eval_atg.shape

(19303, 6)

In [9]:
metrics = {}
libcirctis.calc_metrics_labels(metrics, df_samples_eval_atg['sample_label'], df_samples_eval_atg['circTIS_label'])
libcirctis.calc_metrics_scores(metrics, df_samples_eval_atg['sample_label'], df_samples_eval_atg['circTIS_score'])
metrics

{'TP': 758,
 'FP': 56,
 'FN': 123,
 'TN': 18366,
 'F1_score': 0.8943952802359882,
 'Precision': 0.9312039312039312,
 'Recall': 0.8603859250851306,
 'Specificity': 0.996960156334817,
 'Accuracy': 0.9907268300264208,
 'AUPR': 0.9241757591638514,
 'AUROC': 0.9864470145070341}

In [10]:
df_samples_eval_nc = df_samples_eval[df_samples_eval['TIS_type'] != 'ATG']
df_samples_eval_nc.to_csv(output_samples_eval_file_nc, sep='\t', index=False)
df_samples_eval_nc.shape

(52880, 6)

In [11]:
metrics = {}
libcirctis.calc_metrics_labels(metrics, df_samples_eval_nc['sample_label'], df_samples_eval_nc['circTIS_label'])
libcirctis.calc_metrics_scores(metrics, df_samples_eval_nc['sample_label'], df_samples_eval_nc['circTIS_score'])
metrics

{'TP': 65,
 'FP': 3,
 'FN': 33,
 'TN': 52779,
 'F1_score': 0.783132530120482,
 'Precision': 0.9558823529411765,
 'Recall': 0.6632653061224489,
 'Specificity': 0.9999431624417415,
 'Accuracy': 0.9993192133131619,
 'AUPR': 0.7422279105454682,
 'AUROC': 0.9683745386298205}