In [3]:
import os
import numpy as np 
import pandas as pd 
from tqdm import tqdm
import json 
import datetime
from utils.Predictor import Predictor
from utils.Evaluations import Evaluations
from utils.util import get_organism_info

## Predict Boundaries

In [None]:
#folder containing genomes of interest 
# ("dataset/genomes/benbow_test","dataset/genomes/literature" )
genome_path = "dataset/genomes/benbow_test" 
files = [f for f in os.listdir(genome_path) if os.path.isfile(os.path.join(genome_path, f))]

# path to load the saved model: hpc_job/utils/models/
# "benbow_SVM_RCKmer_7_fine_tuned_literature.pkl", "benbow_SVM_RCKmer_7_fine_tuned_benbow.pkl"
model_file = "benbow_SVM_RCKmer_7_fine_tuned_benbow.pkl" 

# path to save the predictions
output_path = "outputs"
output_path = os.path.join(output_path, genome_path.split('/')[-1], model_file.split('.')[0])

# parameters for data representation
data_rep_params = {
                'encoding':'RCKmer',
                'encoding_params':{'kmer':7}
            }

if not os.path.exists(output_path):
    os.makedirs(output_path)

#run the following code only if the predictions do not exist for the specified genomes and model
for file in tqdm(files):
    #accept fasta file only
    if file.endswith('.fasta'):
        filename = os.path.join(genome_path,file)

        #create a folder for each genome
        output_dest = os.path.join(output_path, file.split('.')[0])
        if not os.path.exists(output_dest):
            os.mkdir(output_dest)

            #initialize the predicor
            seq = Predictor(filename, output_file_path=output_dest, model_file=model_file)
            
            #update the representation according to the trained model
            params = data_rep_params

            seq.change_encoding_parameters(params)
            #seq.change_upper_threshold(0.9)

            #run the predictor
            pred = seq.predict()
            
            #save predictions to excel file
            seq.predictions_to_excel(pred)
        else:
            print("Predictions for {} already exist".format(file))
    else:
        print("The code only accepts fasta file!")

  0%|          | 0/24 [00:00<?, ?it/s]

--- start predicting ---


Processing NC_008253.1|1|NC_008253.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 494/494 [00:00<00:00, 557585.09it/s]



 Get DNA segment probability  

positive ratio:  0.25708502024291496
out of distribution:  False


100%|██████████| 494/494 [00:00<00:00, 2906011.47it/s]


 Fine tune GEI borders  




100%|██████████| 28/28 [00:04<00:00,  6.60it/s]
Processing NC_008253.1|1|NC_008253.1: 100%|██████████| 1/1 [00:19<00:00, 19.09s/it]
  4%|▍         | 1/24 [00:19<07:20, 19.15s/it]

--- finished predicting ---
--- 19.104822874069214 seconds ---
--- start predicting ---


Processing NC_010473.1|1|NC_010473.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 469/469 [00:00<00:00, 616268.35it/s]



 Get DNA segment probability  

positive ratio:  0.18336886993603413
out of distribution:  False


100%|██████████| 469/469 [00:00<00:00, 2953646.51it/s]


 Fine tune GEI borders  




100%|██████████| 37/37 [00:03<00:00,  9.31it/s]
Processing NC_010473.1|1|NC_010473.1: 100%|██████████| 1/1 [00:16<00:00, 16.41s/it]
  8%|▊         | 2/24 [00:35<06:26, 17.57s/it]

--- finished predicting ---
--- 16.42015790939331 seconds ---
--- start predicting ---


Processing NC_007958.1|1|NC_007958.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 490/490 [00:00<00:00, 397833.71it/s]



 Get DNA segment probability  

positive ratio:  0.7122448979591837
out of distribution:  False


100%|██████████| 490/490 [00:00<00:00, 1761104.51it/s]


 Fine tune GEI borders  




100%|██████████| 102/102 [00:24<00:00,  4.16it/s]
Processing NC_007958.1|1|NC_007958.1: 100%|██████████| 1/1 [00:45<00:00, 45.51s/it]
 12%|█▎        | 3/24 [01:21<10:37, 30.36s/it]

--- finished predicting ---
--- 45.52085494995117 seconds ---
--- start predicting ---


Processing NC_009665.1|1|NC_009665.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 523/523 [00:00<00:00, 377040.39it/s]



 Get DNA segment probability  

positive ratio:  0.7284894837476099
out of distribution:  False


100%|██████████| 523/523 [00:00<00:00, 1771907.10it/s]


 Fine tune GEI borders  




100%|██████████| 104/104 [00:24<00:00,  4.19it/s]
Processing NC_009665.1|1|NC_009665.1: 100%|██████████| 1/1 [00:47<00:00, 47.66s/it]
 17%|█▋        | 4/24 [02:08<12:24, 37.23s/it]

--- finished predicting ---
--- 47.676778078079224 seconds ---
--- start predicting ---


Processing NC_010334.1|1|NC_010334.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 523/523 [00:00<00:00, 509315.30it/s]



 Get DNA segment probability  

positive ratio:  0.6711281070745698
out of distribution:  False


100%|██████████| 523/523 [00:00<00:00, 1873288.64it/s]


 Fine tune GEI borders  




100%|██████████| 105/105 [00:25<00:00,  4.14it/s]
Processing NC_010334.1|1|NC_010334.1: 100%|██████████| 1/1 [00:47<00:00, 47.12s/it]
 21%|██        | 5/24 [02:56<12:55, 40.81s/it]

--- finished predicting ---
--- 47.12811899185181 seconds ---
--- start predicting ---


Processing NC_007432.1|1|NC_007432.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 213/213 [00:00<00:00, 955493.85it/s]



 Get DNA segment probability  

positive ratio:  0.27699530516431925
out of distribution:  False


100%|██████████| 213/213 [00:00<00:00, 2173690.39it/s]


 Fine tune GEI borders  




100%|██████████| 21/21 [00:01<00:00, 10.67it/s]
Processing NC_007432.1|1|NC_007432.1: 100%|██████████| 1/1 [00:08<00:00,  8.03s/it]
 25%|██▌       | 6/24 [03:04<08:54, 29.68s/it]

--- finished predicting ---
--- 8.0380117893219 seconds ---
--- start predicting ---


Processing NC_009512.1|1|NC_009512.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 596/596 [00:00<00:00, 572693.05it/s]



 Get DNA segment probability  

positive ratio:  0.2063758389261745
out of distribution:  False


100%|██████████| 596/596 [00:00<00:00, 2937491.40it/s]


 Fine tune GEI borders  




100%|██████████| 44/44 [00:05<00:00,  7.89it/s]
Processing NC_009512.1|1|NC_009512.1: 100%|██████████| 1/1 [00:21<00:00, 21.48s/it]
 29%|██▉       | 7/24 [03:25<07:39, 27.01s/it]

--- finished predicting ---
--- 21.486854076385498 seconds ---
--- start predicting ---


Processing NC_009783.1|1|NC_009783.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 377/377 [00:00<00:00, 683046.48it/s]



 Get DNA segment probability  

positive ratio:  0.3183023872679045
out of distribution:  False


100%|██████████| 377/377 [00:00<00:00, 2399472.85it/s]


 Fine tune GEI borders  




100%|██████████| 40/40 [00:06<00:00,  6.00it/s]
Processing NC_009783.1|1|NC_009783.1: 100%|██████████| 1/1 [00:18<00:00, 18.15s/it]
 33%|███▎      | 8/24 [03:43<06:27, 24.21s/it]

--- finished predicting ---
--- 18.160367250442505 seconds ---
--- start predicting ---


Processing NC_010501.1|1|NC_010501.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 578/578 [00:00<00:00, 945886.74it/s]



 Get DNA segment probability  

positive ratio:  0.2975778546712803
out of distribution:  False


100%|██████████| 578/578 [00:00<00:00, 2809163.05it/s]


 Fine tune GEI borders  




100%|██████████| 60/60 [00:10<00:00,  5.95it/s]
Processing NC_010501.1|1|NC_010501.1: 100%|██████████| 1/1 [00:26<00:00, 26.45s/it]
 38%|███▊      | 9/24 [04:10<06:13, 24.93s/it]

--- finished predicting ---
--- 26.46033024787903 seconds ---
--- start predicting ---


Processing NC_008321.1|1|NC_008321.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 471/471 [00:00<00:00, 853355.16it/s]



 Get DNA segment probability  

positive ratio:  0.7133757961783439
out of distribution:  False


100%|██████████| 471/471 [00:00<00:00, 1861938.91it/s]


 Fine tune GEI borders  




100%|██████████| 96/96 [00:28<00:00,  3.37it/s]
Processing NC_008321.1|1|NC_008321.1: 100%|██████████| 1/1 [00:47<00:00, 47.74s/it]
 42%|████▏     | 10/24 [04:58<07:27, 31.99s/it]

--- finished predicting ---
--- 47.751322984695435 seconds ---
Predictions for NC_005071.1.fasta already exist
--- start predicting ---


Processing NC_004116.1|1|NC_004116.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 217/217 [00:00<00:00, 214257.05it/s]



 Get DNA segment probability  

positive ratio:  0.34101382488479265
out of distribution:  False


100%|██████████| 217/217 [00:00<00:00, 2151687.87it/s]


 Fine tune GEI borders  




100%|██████████| 23/23 [00:03<00:00,  6.66it/s]
Processing NC_004116.1|1|NC_004116.1: 100%|██████████| 1/1 [00:09<00:00,  9.60s/it]
 50%|█████     | 12/24 [05:07<03:51, 19.26s/it]

--- finished predicting ---
--- 9.610486030578613 seconds ---
--- start predicting ---


Processing NC_008024.1|1|NC_008024.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 194/194 [00:00<00:00, 757630.33it/s]



 Get DNA segment probability  

positive ratio:  0.38144329896907214
out of distribution:  False


100%|██████████| 194/194 [00:00<00:00, 2044459.74it/s]


 Fine tune GEI borders  




100%|██████████| 17/17 [00:03<00:00,  5.48it/s]
Processing NC_008024.1|1|NC_008024.1: 100%|██████████| 1/1 [00:08<00:00,  8.76s/it]
 54%|█████▍    | 13/24 [05:16<03:02, 16.63s/it]

--- finished predicting ---
--- 8.770516872406006 seconds ---
--- start predicting ---


Processing NC_009504.1|1|NC_009504.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 117/117 [00:00<00:00, 812472.79it/s]



 Get DNA segment probability  

positive ratio:  0.6068376068376068
out of distribution:  False


100%|██████████| 117/117 [00:00<00:00, 1426551.07it/s]


 Fine tune GEI borders  




100%|██████████| 22/22 [00:04<00:00,  5.02it/s]
Processing NC_009504.1|1|NC_009504.1: 100%|██████████| 1/1 [00:09<00:00,  9.06s/it]
 58%|█████▊    | 14/24 [05:25<02:26, 14.65s/it]

--- finished predicting ---
--- 9.068098068237305 seconds ---
--- start predicting ---


Processing NC_010515.1|1|NC_010515.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 322/322 [00:00<00:00, 1065115.05it/s]



 Get DNA segment probability  

positive ratio:  0.7608695652173914
out of distribution:  False


100%|██████████| 322/322 [00:00<00:00, 1619383.56it/s]


 Fine tune GEI borders  




100%|██████████| 62/62 [00:14<00:00,  4.36it/s]
Processing NC_010515.1|1|NC_010515.1: 100%|██████████| 1/1 [00:29<00:00, 29.17s/it]
 62%|██████▎   | 15/24 [05:54<02:47, 18.63s/it]

--- finished predicting ---
--- 29.17317295074463 seconds ---
--- start predicting ---


Processing NC_010322.1|1|NC_010322.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 608/608 [00:00<00:00, 1034118.75it/s]



 Get DNA segment probability  

positive ratio:  0.18914473684210525
out of distribution:  False


100%|██████████| 608/608 [00:00<00:00, 3183691.43it/s]


 Fine tune GEI borders  




100%|██████████| 37/37 [00:04<00:00,  9.01it/s]
Processing NC_010322.1|1|NC_010322.1: 100%|██████████| 1/1 [00:19<00:00, 19.47s/it]
 67%|██████▋   | 16/24 [06:14<02:31, 18.88s/it]

--- finished predicting ---
--- 19.483875036239624 seconds ---
--- start predicting ---


Processing NC_009438.1|1|NC_009438.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 466/466 [00:00<00:00, 529543.66it/s]



 Get DNA segment probability  

positive ratio:  0.7296137339055794
out of distribution:  False


100%|██████████| 466/466 [00:00<00:00, 1881179.66it/s]


 Fine tune GEI borders  




100%|██████████| 94/94 [00:26<00:00,  3.59it/s]
Processing NC_009438.1|1|NC_009438.1: 100%|██████████| 1/1 [00:46<00:00, 46.60s/it]
 71%|███████   | 17/24 [07:01<03:07, 26.84s/it]

--- finished predicting ---
--- 46.613094091415405 seconds ---
--- start predicting ---


Processing NC_004603.1|1|NC_004603.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 329/329 [00:00<00:00, 587702.73it/s]



 Get DNA segment probability  

positive ratio:  0.2674772036474164
out of distribution:  False


100%|██████████| 329/329 [00:00<00:00, 2346812.95it/s]


 Fine tune GEI borders  




100%|██████████| 28/28 [00:05<00:00,  4.95it/s]
Processing NC_004603.1|1|NC_004603.1: 100%|██████████| 1/1 [00:14<00:00, 14.58s/it]
 75%|███████▌  | 18/24 [07:15<02:19, 23.29s/it]

--- finished predicting ---
--- 14.588462114334106 seconds ---
--- start predicting ---


Processing NC_009342.1|1|NC_009342.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 332/332 [00:00<00:00, 290226.95it/s]



 Get DNA segment probability  

positive ratio:  0.10843373493975904
out of distribution:  False


100%|██████████| 332/332 [00:00<00:00, 2578720.24it/s]


 Fine tune GEI borders  




100%|██████████| 15/15 [00:02<00:00,  6.46it/s]
Processing NC_009342.1|1|NC_009342.1: 100%|██████████| 1/1 [00:10<00:00, 10.41s/it]
 79%|███████▉  | 19/24 [07:26<01:37, 19.53s/it]

--- finished predicting ---
--- 10.416185855865479 seconds ---
--- start predicting ---


Processing NC_007606.1|1|NC_007606.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 437/437 [00:00<00:00, 908280.90it/s]



 Get DNA segment probability  

positive ratio:  0.3135011441647597
out of distribution:  False


100%|██████████| 437/437 [00:00<00:00, 2528152.89it/s]


 Fine tune GEI borders  




100%|██████████| 64/64 [00:09<00:00,  6.65it/s]
Processing NC_007606.1|1|NC_007606.1: 100%|██████████| 1/1 [00:22<00:00, 22.73s/it]
 83%|████████▎ | 20/24 [07:49<01:21, 20.49s/it]

--- finished predicting ---
--- 22.735143899917603 seconds ---
--- start predicting ---


Processing NC_004070.1|1|NC_004070.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 191/191 [00:00<00:00, 838860.80it/s]



 Get DNA segment probability  

positive ratio:  0.3298429319371728
out of distribution:  False


100%|██████████| 191/191 [00:00<00:00, 2048879.96it/s]


 Fine tune GEI borders  




100%|██████████| 15/15 [00:02<00:00,  5.51it/s]
Processing NC_004070.1|1|NC_004070.1: 100%|██████████| 1/1 [00:08<00:00,  8.41s/it]
 88%|████████▊ | 21/24 [07:57<00:50, 16.91s/it]

--- finished predicting ---
--- 8.413859844207764 seconds ---
--- start predicting ---


Processing NC_008563.1|1|NC_008563.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 509/509 [00:00<00:00, 977071.28it/s]



 Get DNA segment probability  

positive ratio:  0.2730844793713163
out of distribution:  False


100%|██████████| 509/509 [00:00<00:00, 2740565.77it/s]


 Fine tune GEI borders  




100%|██████████| 36/36 [00:06<00:00,  5.36it/s]
Processing NC_008563.1|1|NC_008563.1: 100%|██████████| 1/1 [00:22<00:00, 22.03s/it]
 92%|█████████▏| 22/24 [08:19<00:36, 18.45s/it]

--- finished predicting ---
--- 22.042513847351074 seconds ---
--- start predicting ---


Processing NC_009708.1|1|NC_009708.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 473/473 [00:00<00:00, 195941.31it/s]



 Get DNA segment probability  

positive ratio:  0.4143763213530655
out of distribution:  False


100%|██████████| 473/473 [00:00<00:00, 2413510.70it/s]


 Fine tune GEI borders  




100%|██████████| 68/68 [00:11<00:00,  6.07it/s]
Processing NC_009708.1|1|NC_009708.1: 100%|██████████| 1/1 [00:27<00:00, 27.12s/it]
 96%|█████████▌| 23/24 [08:46<00:21, 21.05s/it]

--- finished predicting ---
--- 27.125896215438843 seconds ---
--- start predicting ---


Processing NC_010380.1|1|NC_010380.1:   0%|          | 0/1 [00:00<?, ?it/s]


-------- sequence 1-------- 


 Preprocessing DNA segment 



100%|██████████| 225/225 [00:00<00:00, 974915.70it/s]



 Get DNA segment probability  

positive ratio:  0.6311111111111111
out of distribution:  False


100%|██████████| 225/225 [00:00<00:00, 1618727.96it/s]


 Fine tune GEI borders  




100%|██████████| 44/44 [00:10<00:00,  4.21it/s]
Processing NC_010380.1|1|NC_010380.1: 100%|██████████| 1/1 [00:19<00:00, 19.00s/it]
100%|██████████| 24/24 [09:05<00:00, 22.74s/it]

--- finished predicting ---
--- 19.010355949401855 seconds ---





In [None]:
#read predictions for each genome, then combine them into a file
results = pd.DataFrame()
results_dest = '{}/{}.xlsx'.format(output_path, model_file.split('.')[0])

for dir in os.listdir(output_path):
    child_dirs = os.path.join(output_path,dir)
    for file in os.listdir(child_dirs):
        #if 'out' not in file:
            res = pd.read_excel(os.path.join(child_dirs,file))
            res = res.drop(res.columns[0], axis=1)
            res = res.assign(Genome=dir)
            results = pd.concat([results,res])

results = results.rename(columns={'accession':'Accession','start':'Start','end':'End'})
results = results[results['probability']>0.5]
results['Accession'] = results.apply(lambda x: x['Accession'].split('|')[0],axis=1)
results.to_excel(results_dest, index=False)

### copy the above results to the folder outputs/literature_predictions for evaluation
Results:
- literature: outputs/literature_predictions/benbow_SVM_RCKmer_7_fine_tuned_literature.xlsx
- benbow_test: outputs/literature_predictions/benbow_SVM_RCKmer_7_fine_tuned_test_benbow.xlsx

## Evaluate Boundaries Prediction

In [6]:
predictors_folder = "outputs/literature_predictions"
predictors = ["alien_hunter", "islander", "islandpath_dimob", "islandviewer", "sigi_hmm", "islandpick"]
model_dict = {}
result_type = 'literature' #test or literature evaluation

#benbow_SVM_RCKmer_7_fine_tuned_test_benbow
#islandpick_SVM_RCKmer_7_fine_tuned_test_benbow
#mergedworvmgicluster_SVM_RCKmer_7_fine_tuned_benbow
#mergedworvmgicluster_SVM_RCKmer_7_fine_tuned_benbow_test
if result_type == 'test':
    benbow_file = "fine_tuned_model_test"
    treasure_island = "treasure_island_test"
elif result_type == 'literature':
    benbow_file = "fine_tuned_model_literature"
    treasure_island = "treasure_island_literature"

predictors += [treasure_island,benbow_file]

for predictor in predictors:
    predictor_file = os.path.join(predictors_folder, predictor)
    predictor_file = predictor_file+'.xlsx'
    predictor_df = pd.read_excel(predictor_file)

    model_dict.update({predictor:get_organism_info(predictor_df)}) 

#read ground truth data (GI_literature_set_table, GI_negative_set_table, positive_test_table_gc, negative_test_table_gc)
if result_type == 'literature':
    pos_table = pd.read_excel("outputs/literature_reference/GI_literature_set_table.xlsx")
    neg_table = pd.read_excel("outputs/literature_reference/GI_negative_set_table.xlsx")
elif result_type == 'test':
    pos_table = pd.read_excel("outputs/literature_reference/positive_test_table_gc.xlsx")
    neg_table = pd.read_excel("outputs/literature_reference/negative_test_table_gc.xlsx")

organism_pos_test_dict = get_organism_info(pos_table)
organism_neg_test_dict = get_organism_info(neg_table)

total_orgs = organism_pos_test_dict.keys()

eval = Evaluations()

print("evaluation of {} data".format(result_type))
eval_results = eval.evaluations_main_104(total_orgs, 
                                        model_dict, 
                                        organism_pos_test_dict, 
                                        organism_neg_test_dict, 
                                        result_type, 
                                        False)


def myconverter(obj):
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, datetime.datetime):
        return obj.__str__()
            
json_obj = json.dumps(eval_results, indent=1, default=myconverter)
json_file = "outputs/evaluation/evaluation_result_{}_fine_tuned_model.json".format(result_type)

with open(json_file, 'w') as file:
    json.dump(json_obj, file, indent=4)

evaluation of literature data
---------------
alien_hunter
{'NC_004431.1': [{'TP': 552951}, {'FP': 25986}, {'TN': 497212}, {'FN': 243868}, {'Precision': 0.9551142870467771}, {'Recall': 0.6939480609774616}, {'F-Score': 0.8038503920753389}, {'Accuracy': 0.7955677843542924}, {'MCC': 0.6350963622480542}], 'NC_002695.1': [{'TP': 534686}, {'FP': 27825}, {'TN': 486507}, {'FN': 347229}, {'Precision': 0.9505343006625648}, {'Recall': 0.6062783828373483}, {'F-Score': 0.7403439151607628}, {'Accuracy': 0.7313842035112699}, {'MCC': 0.5430443547116833}], 'NC_003198.1': [{'TP': 450504}, {'FP': 32873}, {'TN': 515842}, {'FN': 183190}, {'Precision': 0.9319930406287432}, {'Recall': 0.7109172565938765}, {'F-Score': 0.8065807813469331}, {'Accuracy': 0.8172688130756786}, {'MCC': 0.6604016515537388}], 'NC_003923.1': [{'TP': 5000}, {'FP': 31830}, {'TN': 240519}, {'FN': 212639}, {'Precision': 0.13575889220743959}, {'Recall': 0.022973823625361264}, {'F-Score': 0.03929751757581473}, {'Accuracy': 0.501071454811138