In [1]:
import os, sys
import numpy as np 
import pandas as pd 
from tqdm import tqdm
import json 
import datetime
# Add parent dir to sys.path
sys.path.append(os.path.abspath(".."))
from utils.Predictor import Predictor
from utils.Evaluations import Evaluations
from utils.util import get_organism_info

## Predict Boundaries

In [None]:
#folder containing genomes of interest 
# ("../dataset/genomes/benbow_test","../dataset/genomes/literature" )
genome_path = "../dataset/genomes/benbow_test" 
files = [f for f in os.listdir(genome_path) if os.path.isfile(os.path.join(genome_path, f))]

# path to load the saved model: ../utils/models/
model_file = "fine_tuned_model.pkl" 

# path to save the predictions
output_path = "../outputs"
output_path = os.path.join(output_path, genome_path.split('/')[-1], model_file.split('.')[0])

# parameters for data representation
data_rep_params = {
                'representation':'RCKmer',
                'representation_params':{'kmer':7}
            }

if not os.path.exists(output_path):
    os.makedirs(output_path)

#run the following code only if the predictions do not exist for the specified genomes and model
for file in tqdm(files):
    #accept fasta file only
    if file.endswith('.fasta'):
        filename = os.path.join(genome_path,file)

        #create a folder for each genome
        output_dest = os.path.join(output_path, file.split('.')[0])
        if not os.path.exists(output_dest):
            os.mkdir(output_dest)

            #initialize the predicor
            seq = Predictor(filename, output_file_path=output_dest, model_file=model_file)
            
            #update the representation according to the trained model
            params = data_rep_params

            seq.change_representation_parameters(params)
            #seq.change_upper_threshold(0.9)

            #run the predictor
            pred = seq.predict()
            
            #save predictions to excel file
            seq.predictions_to_excel(pred)
        else:
            print("Predictions for {} already exist".format(file))
    else:
        print("The code only accepts fasta file!")

  0%|          | 0/24 [00:00<?, ?it/s]

--- start predicting ---


Processing NC_008253.1|1|NC_008253.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 494/494 [00:00<00:00, 498073.60it/s]



 Get DNA segment probability  

positive ratio:  0.2935222672064777
out of distribution:  False


100%|██████████| 494/494 [00:00<00:00, 2561169.56it/s]


 Fine tune GEI borders  




100%|██████████| 32/32 [00:05<00:00,  6.15it/s]
Processing NC_008253.1|1|NC_008253.1: 100%|██████████| 1/1 [00:21<00:00, 21.40s/it]
  4%|▍         | 1/24 [00:21<08:16, 21.57s/it]

--- finished predicting ---
--- 21.440386056900024 seconds ---
--- start predicting ---


Processing NC_010473.1|1|NC_010473.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 469/469 [00:00<00:00, 896594.61it/s]



 Get DNA segment probability  

positive ratio:  0.24520255863539445
out of distribution:  False


100%|██████████| 469/469 [00:00<00:00, 2846785.20it/s]


 Fine tune GEI borders  




100%|██████████| 36/36 [00:05<00:00,  6.07it/s]
Processing NC_010473.1|1|NC_010473.1: 100%|██████████| 1/1 [00:19<00:00, 19.27s/it]
  8%|▊         | 2/24 [00:40<07:25, 20.25s/it]

--- finished predicting ---
--- 19.281096935272217 seconds ---
--- start predicting ---


Processing NC_007958.1|1|NC_007958.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 490/490 [00:00<00:00, 690362.43it/s]



 Get DNA segment probability  

positive ratio:  0.6918367346938775
out of distribution:  False


100%|██████████| 490/490 [00:00<00:00, 1906501.82it/s]


 Fine tune GEI borders  




100%|██████████| 94/94 [00:23<00:00,  3.96it/s]
Processing NC_007958.1|1|NC_007958.1: 100%|██████████| 1/1 [00:43<00:00, 43.30s/it]
 12%|█▎        | 3/24 [01:24<10:46, 30.80s/it]

--- finished predicting ---
--- 43.31108498573303 seconds ---
--- start predicting ---


Processing NC_009665.1|1|NC_009665.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 523/523 [00:00<00:00, 699273.51it/s]



 Get DNA segment probability  

positive ratio:  0.7284894837476099
out of distribution:  False


100%|██████████| 523/523 [00:00<00:00, 1828017.49it/s]


 Fine tune GEI borders  




100%|██████████| 105/105 [00:26<00:00,  4.01it/s]
Processing NC_009665.1|1|NC_009665.1: 100%|██████████| 1/1 [00:49<00:00, 49.32s/it]
 17%|█▋        | 4/24 [02:13<12:42, 38.13s/it]

--- finished predicting ---
--- 49.33301901817322 seconds ---
--- start predicting ---


Processing NC_010334.1|1|NC_010334.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 523/523 [00:00<00:00, 770232.09it/s]



 Get DNA segment probability  

positive ratio:  0.6787762906309751
out of distribution:  False


100%|██████████| 523/523 [00:00<00:00, 1944699.46it/s]


 Fine tune GEI borders  




100%|██████████| 107/107 [00:29<00:00,  3.60it/s]
Processing NC_010334.1|1|NC_010334.1: 100%|██████████| 1/1 [00:51<00:00, 51.40s/it]
 21%|██        | 5/24 [03:05<13:35, 42.93s/it]

--- finished predicting ---
--- 51.409305810928345 seconds ---
--- start predicting ---


Processing NC_007432.1|1|NC_007432.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 213/213 [00:00<00:00, 762926.35it/s]



 Get DNA segment probability  

positive ratio:  0.4507042253521127
out of distribution:  False


100%|██████████| 213/213 [00:00<00:00, 1900822.88it/s]


 Fine tune GEI borders  




100%|██████████| 29/29 [00:05<00:00,  5.35it/s]
Processing NC_007432.1|1|NC_007432.1: 100%|██████████| 1/1 [00:11<00:00, 11.97s/it]
 25%|██▌       | 6/24 [03:17<09:43, 32.42s/it]

--- finished predicting ---
--- 11.980782985687256 seconds ---
--- start predicting ---


Processing NC_009512.1|1|NC_009512.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 596/596 [00:00<00:00, 739587.33it/s]



 Get DNA segment probability  

positive ratio:  0.2902684563758389
out of distribution:  False


100%|██████████| 596/596 [00:00<00:00, 3105348.05it/s]


 Fine tune GEI borders  




100%|██████████| 50/50 [00:08<00:00,  6.03it/s]
Processing NC_009512.1|1|NC_009512.1: 100%|██████████| 1/1 [00:24<00:00, 24.72s/it]
 29%|██▉       | 7/24 [03:41<08:28, 29.92s/it]

--- finished predicting ---
--- 24.726610898971558 seconds ---
--- start predicting ---


Processing NC_009783.1|1|NC_009783.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 377/377 [00:00<00:00, 992002.89it/s]



 Get DNA segment probability  

positive ratio:  0.35809018567639256
out of distribution:  False


100%|██████████| 377/377 [00:00<00:00, 2108336.81it/s]


 Fine tune GEI borders  




100%|██████████| 40/40 [00:07<00:00,  5.15it/s]
Processing NC_009783.1|1|NC_009783.1: 100%|██████████| 1/1 [00:19<00:00, 19.90s/it]
 33%|███▎      | 8/24 [04:01<07:07, 26.74s/it]

--- finished predicting ---
--- 19.909531831741333 seconds ---
--- start predicting ---


Processing NC_010501.1|1|NC_010501.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 578/578 [00:00<00:00, 1070807.29it/s]



 Get DNA segment probability  

positive ratio:  0.35986159169550175
out of distribution:  False


100%|██████████| 578/578 [00:00<00:00, 2579050.76it/s]


 Fine tune GEI borders  




100%|██████████| 66/66 [00:13<00:00,  4.79it/s]
Processing NC_010501.1|1|NC_010501.1: 100%|██████████| 1/1 [00:30<00:00, 30.63s/it]
 38%|███▊      | 9/24 [04:32<06:59, 27.97s/it]

--- finished predicting ---
--- 30.63654589653015 seconds ---
--- start predicting ---


Processing NC_008321.1|1|NC_008321.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 471/471 [00:00<00:00, 967442.30it/s]



 Get DNA segment probability  

positive ratio:  0.7133757961783439
out of distribution:  False


100%|██████████| 471/471 [00:00<00:00, 1890447.07it/s]


 Fine tune GEI borders  




100%|██████████| 92/92 [00:26<00:00,  3.46it/s]
Processing NC_008321.1|1|NC_008321.1: 100%|██████████| 1/1 [00:46<00:00, 46.84s/it]
 42%|████▏     | 10/24 [05:19<07:53, 33.81s/it]

--- finished predicting ---
--- 46.8461058139801 seconds ---
--- start predicting ---


Processing NC_005071.1|1|NC_005071.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 242/242 [00:00<00:00, 375239.03it/s]



 Get DNA segment probability  

positive ratio:  0.4090909090909091
out of distribution:  False


100%|██████████| 242/242 [00:00<00:00, 2084233.20it/s]


 Fine tune GEI borders  




100%|██████████| 33/33 [00:07<00:00,  4.54it/s]
Processing NC_005071.1|1|NC_005071.1: 100%|██████████| 1/1 [00:14<00:00, 14.43s/it]
 46%|████▌     | 11/24 [05:33<06:02, 27.89s/it]

--- finished predicting ---
--- 14.440725803375244 seconds ---
--- start predicting ---


Processing NC_004116.1|1|NC_004116.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 217/217 [00:00<00:00, 1074573.75it/s]



 Get DNA segment probability  

positive ratio:  0.48847926267281105
out of distribution:  False


100%|██████████| 217/217 [00:00<00:00, 1904108.72it/s]


 Fine tune GEI borders  




100%|██████████| 33/33 [00:06<00:00,  5.32it/s]
Processing NC_004116.1|1|NC_004116.1: 100%|██████████| 1/1 [00:13<00:00, 13.65s/it]
 50%|█████     | 12/24 [05:47<04:42, 23.57s/it]

--- finished predicting ---
--- 13.654690027236938 seconds ---
--- start predicting ---


Processing NC_008024.1|1|NC_008024.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 194/194 [00:00<00:00, 881576.36it/s]



 Get DNA segment probability  

positive ratio:  0.4536082474226804
out of distribution:  False


100%|██████████| 194/194 [00:00<00:00, 1684668.69it/s]


 Fine tune GEI borders  




100%|██████████| 24/24 [00:05<00:00,  4.64it/s]
Processing NC_008024.1|1|NC_008024.1: 100%|██████████| 1/1 [00:11<00:00, 11.51s/it]
 54%|█████▍    | 13/24 [05:59<03:39, 19.93s/it]

--- finished predicting ---
--- 11.520473957061768 seconds ---
--- start predicting ---


Processing NC_009504.1|1|NC_009504.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 117/117 [00:00<00:00, 807127.58it/s]



 Get DNA segment probability  

positive ratio:  0.6410256410256411
out of distribution:  False


100%|██████████| 117/117 [00:00<00:00, 1185346.78it/s]


 Fine tune GEI borders  




100%|██████████| 23/23 [00:04<00:00,  4.61it/s]
Processing NC_009504.1|1|NC_009504.1: 100%|██████████| 1/1 [00:09<00:00,  9.86s/it]
 58%|█████▊    | 14/24 [06:08<02:49, 16.90s/it]

--- finished predicting ---
--- 9.860074043273926 seconds ---
--- start predicting ---


Processing NC_010515.1|1|NC_010515.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 322/322 [00:00<00:00, 781125.44it/s]



 Get DNA segment probability  

positive ratio:  0.7608695652173914
out of distribution:  False


100%|██████████| 322/322 [00:00<00:00, 1667365.29it/s]


 Fine tune GEI borders  




100%|██████████| 61/61 [00:15<00:00,  3.94it/s]
Processing NC_010515.1|1|NC_010515.1: 100%|██████████| 1/1 [00:30<00:00, 30.43s/it]
 62%|██████▎   | 15/24 [06:39<03:08, 20.99s/it]

--- finished predicting ---
--- 30.43395495414734 seconds ---
--- start predicting ---


Processing NC_010322.1|1|NC_010322.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 608/608 [00:00<00:00, 922291.80it/s]



 Get DNA segment probability  

positive ratio:  0.2845394736842105
out of distribution:  False


100%|██████████| 608/608 [00:00<00:00, 2701416.14it/s]


 Fine tune GEI borders  




100%|██████████| 45/45 [00:09<00:00,  4.96it/s]
Processing NC_010322.1|1|NC_010322.1: 100%|██████████| 1/1 [00:25<00:00, 25.14s/it]
 67%|██████▋   | 16/24 [07:04<02:58, 22.25s/it]

--- finished predicting ---
--- 25.150710105895996 seconds ---
--- start predicting ---


Processing NC_009438.1|1|NC_009438.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 466/466 [00:00<00:00, 954834.23it/s]



 Get DNA segment probability  

positive ratio:  0.7317596566523605
out of distribution:  False


100%|██████████| 466/466 [00:00<00:00, 1886627.09it/s]


 Fine tune GEI borders  




100%|██████████| 92/92 [00:28<00:00,  3.18it/s]
Processing NC_009438.1|1|NC_009438.1: 100%|██████████| 1/1 [00:49<00:00, 49.15s/it]
 71%|███████   | 17/24 [07:53<03:32, 30.36s/it]

--- finished predicting ---
--- 49.16073417663574 seconds ---
--- start predicting ---


Processing NC_004603.1|1|NC_004603.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 329/329 [00:00<00:00, 716844.68it/s]



 Get DNA segment probability  

positive ratio:  0.3252279635258359
out of distribution:  False


100%|██████████| 329/329 [00:00<00:00, 2508956.39it/s]


 Fine tune GEI borders  




100%|██████████| 28/28 [00:06<00:00,  4.11it/s]
Processing NC_004603.1|1|NC_004603.1: 100%|██████████| 1/1 [00:16<00:00, 16.41s/it]
 75%|███████▌  | 18/24 [08:10<02:37, 26.18s/it]

--- finished predicting ---
--- 16.419966220855713 seconds ---
--- start predicting ---


Processing NC_009342.1|1|NC_009342.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 332/332 [00:00<00:00, 849608.86it/s]



 Get DNA segment probability  

positive ratio:  0.11746987951807229
out of distribution:  False


100%|██████████| 332/332 [00:00<00:00, 3014088.59it/s]


 Fine tune GEI borders  




100%|██████████| 13/13 [00:02<00:00,  6.20it/s]
Processing NC_009342.1|1|NC_009342.1: 100%|██████████| 1/1 [00:10<00:00, 10.10s/it]
 79%|███████▉  | 19/24 [08:20<01:46, 21.36s/it]

--- finished predicting ---
--- 10.108454942703247 seconds ---
--- start predicting ---


Processing NC_007606.1|1|NC_007606.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 437/437 [00:00<00:00, 992909.45it/s]



 Get DNA segment probability  

positive ratio:  0.36384439359267734
out of distribution:  False


100%|██████████| 437/437 [00:00<00:00, 2386602.67it/s]


 Fine tune GEI borders  




100%|██████████| 66/66 [00:11<00:00,  5.95it/s]
Processing NC_007606.1|1|NC_007606.1: 100%|██████████| 1/1 [00:24<00:00, 24.60s/it]
 83%|████████▎ | 20/24 [08:45<01:29, 22.35s/it]

--- finished predicting ---
--- 24.606449365615845 seconds ---
--- start predicting ---


Processing NC_004070.1|1|NC_004070.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 191/191 [00:00<00:00, 872671.09it/s]



 Get DNA segment probability  

positive ratio:  0.42408376963350786
out of distribution:  False


100%|██████████| 191/191 [00:00<00:00, 1968334.31it/s]


 Fine tune GEI borders  




100%|██████████| 19/19 [00:03<00:00,  5.03it/s]
Processing NC_004070.1|1|NC_004070.1: 100%|██████████| 1/1 [00:09<00:00,  9.59s/it]
 88%|████████▊ | 21/24 [08:54<00:55, 18.53s/it]

--- finished predicting ---
--- 9.599333047866821 seconds ---
--- start predicting ---


Processing NC_008563.1|1|NC_008563.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 509/509 [00:00<00:00, 1133776.28it/s]



 Get DNA segment probability  

positive ratio:  0.3005893909626719
out of distribution:  False


100%|██████████| 509/509 [00:00<00:00, 2675314.21it/s]


 Fine tune GEI borders  




100%|██████████| 41/41 [00:04<00:00,  8.56it/s]
Processing NC_008563.1|1|NC_008563.1: 100%|██████████| 1/1 [00:21<00:00, 21.11s/it]
 92%|█████████▏| 22/24 [09:15<00:38, 19.32s/it]

--- finished predicting ---
--- 21.119904041290283 seconds ---
--- start predicting ---


Processing NC_009708.1|1|NC_009708.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 473/473 [00:00<00:00, 1060912.19it/s]



 Get DNA segment probability  

positive ratio:  0.492600422832981
out of distribution:  False


100%|██████████| 473/473 [00:00<00:00, 2239171.32it/s]


 Fine tune GEI borders  




100%|██████████| 73/73 [00:14<00:00,  5.15it/s]
Processing NC_009708.1|1|NC_009708.1: 100%|██████████| 1/1 [00:31<00:00, 31.30s/it]
 96%|█████████▌| 23/24 [09:47<00:22, 22.93s/it]

--- finished predicting ---
--- 31.309992790222168 seconds ---
--- start predicting ---


Processing NC_010380.1|1|NC_010380.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 225/225 [00:00<00:00, 945609.62it/s]



 Get DNA segment probability  

positive ratio:  0.7955555555555556
out of distribution:  False


100%|██████████| 225/225 [00:00<00:00, 1629910.88it/s]


 Fine tune GEI borders  




100%|██████████| 39/39 [00:11<00:00,  3.50it/s]
Processing NC_010380.1|1|NC_010380.1: 100%|██████████| 1/1 [00:21<00:00, 21.13s/it]
100%|██████████| 24/24 [10:08<00:00, 25.35s/it]

--- finished predicting ---
--- 21.13872790336609 seconds ---





In [3]:
#read predictions for each genome, then combine them into a file
results = pd.DataFrame()
results_dest = '{}/{}.xlsx'.format(output_path, model_file.split('.')[0])

for dir in os.listdir(output_path):
    child_dirs = os.path.join(output_path,dir)
    for file in os.listdir(child_dirs):
        #if 'out' not in file:
            res = pd.read_excel(os.path.join(child_dirs,file))
            res = res.drop(res.columns[0], axis=1)
            res = res.assign(Genome=dir)
            results = pd.concat([results,res])

results = results.rename(columns={'accession':'Accession','start':'Start','end':'End'})
results = results[results['probability']>0.5]
results['Accession'] = results.apply(lambda x: x['Accession'].split('|')[0],axis=1)
results.to_excel(results_dest, index=False)

Results:
- benbow_test: ../outputs/benbow_test/fine_tuned_model.xlsx
- literature: ../outputs/literature/fine_tuned_model.xlsx

copy the above results to the folder outputs/literature_predictions for evaluation and rename them as:
- benbow_test: ../outputs/literature_prediction/fine_tuned_model_test.xlsx
- literature: ../outputs/literature_prediction/fine_tuned_model_literature.xlsx

## Evaluate Boundaries Prediction

In [None]:
predictors_folder = "../outputs/literature_predictions"
predictors = ["alien_hunter", "islander", "islandpath_dimob", "islandviewer", "sigi_hmm", "islandpick"]
model_dict = {}
result_type = 'literature' #test or literature evaluation

if result_type == 'test':
    benbow_file = "fine_tuned_model_test"
    treasure_island = "treasure_island_test"
elif result_type == 'literature':
    benbow_file = "fine_tuned_model_literature"
    treasure_island = "treasure_island_literature"

predictors += [treasure_island,benbow_file]

for predictor in predictors:
    predictor_file = os.path.join(predictors_folder, predictor)
    predictor_file = predictor_file+'.xlsx'
    predictor_df = pd.read_excel(predictor_file)

    model_dict.update({predictor:get_organism_info(predictor_df)}) 

#read ground truth data (GI_literature_set_table, GI_negative_set_table, positive_test_table_gc, negative_test_table_gc)
if result_type == 'literature':
    pos_table = pd.read_excel("../outputs/literature_reference/GI_literature_set_table.xlsx")
    neg_table = pd.read_excel("../outputs/literature_reference/GI_negative_set_table.xlsx")
elif result_type == 'test':
    pos_table = pd.read_excel("../outputs/literature_reference/positive_test_table_gc.xlsx")
    neg_table = pd.read_excel("../outputs/literature_reference/negative_test_table_gc.xlsx")

organism_pos_test_dict = get_organism_info(pos_table)
organism_neg_test_dict = get_organism_info(neg_table)

total_orgs = organism_pos_test_dict.keys()

eval = Evaluations()

print("evaluation of {} data".format(result_type))
eval_results = eval.evaluations_main_104(total_orgs, 
                                        model_dict, 
                                        organism_pos_test_dict, 
                                        organism_neg_test_dict, 
                                        result_type, 
                                        False)


def myconverter(obj):
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, datetime.datetime):
        return obj.__str__()
            
json_obj = json.dumps(eval_results, indent=1, default=myconverter)
json_file = "../outputs/evaluation/evaluation_result_{}_fine_tuned_model.json".format(result_type)

with open(json_file, 'w') as file:
    json.dump(json_obj, file, indent=4)

evaluation of literature data
---------------
alien_hunter
{'NC_004431.1': [{'TP': 552951}, {'FP': 25986}, {'TN': 497212}, {'FN': 243868}, {'Precision': 0.9551142870467771}, {'Recall': 0.6939480609774616}, {'F-Score': 0.8038503920753389}, {'Accuracy': 0.7955677843542924}, {'MCC': 0.6350963622480542}], 'NC_002695.1': [{'TP': 534686}, {'FP': 27825}, {'TN': 486507}, {'FN': 347229}, {'Precision': 0.9505343006625648}, {'Recall': 0.6062783828373483}, {'F-Score': 0.7403439151607628}, {'Accuracy': 0.7313842035112699}, {'MCC': 0.5430443547116833}], 'NC_003198.1': [{'TP': 450504}, {'FP': 32873}, {'TN': 515842}, {'FN': 183190}, {'Precision': 0.9319930406287432}, {'Recall': 0.7109172565938765}, {'F-Score': 0.8065807813469331}, {'Accuracy': 0.8172688130756786}, {'MCC': 0.6604016515537388}], 'NC_003923.1': [{'TP': 5000}, {'FP': 31830}, {'TN': 240519}, {'FN': 212639}, {'Precision': 0.13575889220743959}, {'Recall': 0.022973823625361264}, {'F-Score': 0.03929751757581473}, {'Accuracy': 0.501071454811138