In [1]:
import pandas as pd
from time import time
import config
import libcirctis

test_dataset_fasta_file = 'datasets/splits/test/seqs.fa'
test_dataset_samples_file = 'datasets/splits/test/samples.tsv'

output_path_circTIS = 'outputs/circTIS_test_dataset/'
output_samples_eval_file_atg = 'outputs/samples_eval_ATG.tsv'
output_samples_eval_file_nc = 'outputs/samples_eval_NC.tsv'

In [2]:
tis_types = 'NC1'

start_time = time()

libcirctis.circtis_extern_execution(config.CIRCTIS_PATH, tis_types, test_dataset_fasta_file, output_path_circTIS)

end_time = time()
pred_time = end_time - start_time
pred_time

385.60058641433716

In [3]:
predictions_file = 'outputs/circTIS_test_dataset/all_possible_TIS.tsv'
df_predictions = pd.read_csv(predictions_file, sep='\t', header=0)
df_predictions.shape

(72183, 5)

In [4]:
df_predictions['circTIS_label'] = df_predictions['svm_score'].apply(lambda x: 1 if x > 0 else -1)
df_predictions[df_predictions['circTIS_label'] == 1].shape

(895, 6)

In [5]:
df_samples = pd.read_csv(test_dataset_samples_file, sep='\t', header=0)
df_samples.shape

(72183, 7)

In [6]:
df_samples_merge = pd.merge(df_samples, df_predictions, left_on=['circrna_id', 'TIS_position', 'TIS_type'], right_on=['circRNA_id', 'position', 'TIS_type'])
df_samples_merge.shape

(72183, 12)

In [7]:
df_samples_eval = df_samples_merge[['circRNA_id', 'TIS_type', 'TIS_position', 'sample_label', 'circTIS_label', 'svm_score']].copy()
df_samples_eval.rename(columns={'svm_score': 'circTIS_score'}, inplace=True)
df_samples_eval.shape

(72183, 6)

In [8]:
df_samples_eval_atg = df_samples_eval[df_samples_eval['TIS_type'] == 'ATG']
df_samples_eval.to_csv(output_samples_eval_file_atg, sep='\t', index=False)
df_samples_eval_atg.shape

(19303, 6)

In [9]:
metrics = {}
libcirctis.calc_metrics_labels(metrics, df_samples_eval_atg['sample_label'], df_samples_eval_atg['circTIS_label'])
libcirctis.calc_metrics_scores(metrics, df_samples_eval_atg['sample_label'], df_samples_eval_atg['circTIS_score'])
metrics

{'TP': 758,
 'FP': 68,
 'FN': 123,
 'TN': 18354,
 'F1_score': 0.8881077914469829,
 'Precision': 0.9176755447941889,
 'Recall': 0.8603859250851306,
 'Specificity': 0.9963087612637065,
 'Accuracy': 0.990105165000259,
 'AUPR': 0.9291146172788103,
 'AUROC': 0.9869673542133837}

In [10]:
df_samples_eval_nc = df_samples_eval[df_samples_eval['TIS_type'] != 'ATG']
df_samples_eval.to_csv(output_samples_eval_file_nc, sep='\t', index=False)
df_samples_eval_nc.shape

(52880, 6)

In [11]:
metrics = {}
libcirctis.calc_metrics_labels(metrics, df_samples_eval_nc['sample_label'], df_samples_eval_nc['circTIS_label'])
libcirctis.calc_metrics_scores(metrics, df_samples_eval_nc['sample_label'], df_samples_eval_nc['circTIS_score'])
metrics

{'TP': 67,
 'FP': 2,
 'FN': 31,
 'TN': 52780,
 'F1_score': 0.8023952095808383,
 'Precision': 0.9710144927536232,
 'Recall': 0.6836734693877551,
 'Specificity': 0.9999621082944944,
 'Accuracy': 0.9993759455370651,
 'AUPR': 0.7384653296076221,
 'AUROC': 0.9699377648069571}