In [None]:
rsync -avzhe ssh \
conchae@garnatxa.srv.cpd:/home/conchae/prediction_depolymerase_tropism/prophage_prediction/depolymerase_decipher/ficheros_28032023/Seqbased_model/RF_1302_models \
/media/concha-eloko/Linux/PPT_clean/ficheros_28032023


***
### Load Data :

In [1]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from itertools import product
import random
from collections import Counter, defaultdict
import warnings
import logging
import subprocess
from multiprocessing.pool import ThreadPool
import joblib

# SCikitlearn modules :
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, RandomizedSearchCV, StratifiedKFold
from sklearn.metrics import classification_report , roc_auc_score, matthews_corrcoef
from sklearn.metrics import roc_auc_score

# Scipy modules : 
from scipy.stats import fisher_exact
#from skopt import BayesSearchCV
#from skopt.space import Real, Categorical, Integer
from statistics import mean

path_work = "/media/concha-eloko/Linux/PPT_clean"
path_jobs = "/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models"

> Set the count prophage dico : 

In [2]:
DF_info = pd.read_csv(f"{path_work}/TropiGATv2.final_df_v2.tsv", sep = "\t" ,  header = 0)

DF_info = DF_info.drop_duplicates(subset = ["Protein_name"])

df_prophages = DF_info.drop_duplicates(subset = ["Phage"], keep = "first")
dico_prophage_info = {row["Phage"] : {"prophage_strain" : row["prophage_id"] , "ancestor" : row["Infected_ancestor"]} for _,row in df_prophages.iterrows()}

def get_filtered_prophages(prophage) :
    combinations = []
    to_exclude = set()
    to_keep = set()
    to_keep.add(prophage)
    df_prophage_group = DF_info[(DF_info["prophage_id"] == dico_prophage_info[prophage]["prophage_strain"]) & (DF_info["Infected_ancestor"] == dico_prophage_info[prophage]["ancestor"])]
    if len(df_prophage_group) == 1 :
        pass
    else :
        depo_set = set(df_prophage_group[df_prophage_group["Phage"] == prophage]["domain_seq"].values)
        for prophage_tmp in df_prophage_group["Phage"].unique().tolist() :
            if prophage_tmp != prophage :
                tmp_depo_set = set(df_prophage_group[df_prophage_group["Phage"] == prophage_tmp]["domain_seq"].values)
                if depo_set == tmp_depo_set :
                    to_exclude.add(prophage_tmp)
                else :
                    if tmp_depo_set not in combinations :
                        to_keep.add(prophage_tmp)
                        combinations.append(tmp_depo_set)
                    else :
                        to_exclude.add(prophage_tmp)
    return df_prophage_group , to_exclude , to_keep

good_prophages = set()
excluded_prophages = set()

for prophage, info_prophage in tqdm(dico_prophage_info.items()) :
    if prophage not in excluded_prophages and prophage not in good_prophages:
        _, excluded_members , kept_members = get_filtered_prophages(prophage)
        good_prophages.update(kept_members)
        excluded_prophages.update(excluded_members)

DF_info_lvl_0_filtered = DF_info[DF_info["Phage"].isin(good_prophages)]
DF_info_lvl_0_final = DF_info_lvl_0_filtered[~DF_info_lvl_0_filtered["KL_type_LCA"].str.contains("\\|")]
DF_info_lvl_0 = DF_info_lvl_0_final.copy()

df_prophages = DF_info_lvl_0.drop_duplicates(subset = ["Phage"])
dico_prophage_count = dict(Counter(df_prophages["KL_type_LCA"]))


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15981/15981 [00:22<00:00, 718.88it/s]


In [13]:
cluster_values = set([file.split("_")[0] for file in os.listdir(path_jobs)])
KLtypes = set([file.split("_RF_")[1].split(".full_data")[0] for file in os.listdir(path_jobs)])

KLtypes_paths = {kltype : [f"{path_jobs}/{file}" for file in os.listdir(path_jobs) if file.split("_RF_")[1].split(".full_data")[0]==kltype]
                for kltype in KLtypes}

KLtypes_paths

{'KL71': ['/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.9_RF_KL71.full_data.joblib',
  '/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.75_RF_KL71.full_data.joblib',
  '/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.85_RF_KL71.full_data.joblib',
  '/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.8_RF_KL71.full_data.joblib',
  '/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.7_RF_KL71.full_data.joblib',
  '/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.975_RF_KL71.full_data.joblib',
  '/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.95_RF_KL71.full_data.joblib',
  '/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.65_RF_KL71.full_data.joblib'],
 'KL139': ['/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.75_RF_KL139.full_data.joblib',
  '/media/concha-eloko/Linux/PPT_clean/ficheros

***
### Get the best cl value :

> Functions :

In [6]:
def get_mean_MCC(path_data_object):
    try:
        assert os.path.isfile(path_data_object)
        data_object = joblib.load(path_data_object)
        mcc_values = []
        for i in data_object:
            y_test = data_object[i]["test_data"][0].values
            predictions = data_object[i]["test_data"][1]
            mcc = matthews_corrcoef(y_test, predictions)
            mcc_values.append(mcc)
        del data_object 
        mean_mcc = mean(mcc_values)
        return mean_mcc
    except AssertionError:
        raise FileNotFoundError(f"File not found: {path_data_object}")

    

> Get the best model : 

In [14]:
%%time
with open(f"/media/concha-eloko/Linux/PPT_clean/Seqbased_model/Seqbased_models.prophages_metrics.tsv", "w") as outfile :
    for KLtype in tqdm(KLtypes_paths) : 
        for path in KLtypes_paths[KLtype] :
            cl_value = path.split("/")[-1].split("_RF")[0]
            mean_mcc = get_mean_MCC(path)
            count_kltype = dico_prophage_count[KLtype]
            outfile.write(f"{KLtype}\t{count_kltype}\t{cl_value}\t{mean_mcc}\n")
            print(f"{KLtype}\t{count_kltype}\t{cl_value}\t{mean_mcc}")


  0%|                                                                                                                                     | 0/96 [00:00<?, ?it/s]

KL71	15	0.9	0.5423261445466404
KL71	15	0.75	0.7409207609430039
KL71	15	0.85	0.0
KL71	15	0.8	0.7409207609430039
KL71	15	0.7	0.5919747986457313
KL71	15	0.975	0.4338609156373123
KL71	15	0.95	0.4338609156373123


  1%|█▎                                                                                                                           | 1/96 [00:07<11:41,  7.38s/it]

KL71	15	0.65	0.5015354472338103
KL139	10	0.75	0.7164117158323701
KL139	10	0.9	0.735400640077266
KL139	10	0.85	0.6994347603915498
KL139	10	0.7	0.6911004289095706
KL139	10	0.65	0.6911004289095706
KL139	10	0.8	0.8416176409860249
KL139	10	0.975	0.0


  2%|██▌                                                                                                                          | 2/96 [00:12<09:37,  6.14s/it]

KL139	10	0.95	0.735400640077266
KL124	12	0.65	0.6830039823062017
KL124	12	0.8	0.73554389307022
KL124	12	0.9	0.73554389307022
KL124	12	0.75	0.637485825501128
KL124	12	0.85	0.73554389307022
KL124	12	0.95	0.73554389307022
KL124	12	0.975	0.73554389307022


  3%|███▉                                                                                                                         | 3/96 [00:18<09:18,  6.01s/it]

KL124	12	0.7	0.6830039823062017
KL15	204	0.9	0.7612395705073404
KL15	204	0.975	0.6972225891373132
KL15	204	0.8	0.7322081527819835
KL15	204	0.65	0.6179103798502716
KL15	204	0.95	0.73328605422235
KL15	204	0.75	0.6668222857284879
KL15	204	0.7	0.666487442598033


  4%|█████▏                                                                                                                       | 4/96 [00:25<09:36,  6.26s/it]

KL15	204	0.85	0.781243516812687
KL157	13	0.7	0.7237692173614574
KL157	13	0.65	0.893636187267738
KL157	13	0.9	0.8437737987511634
KL157	13	0.75	0.7573591584938887
KL157	13	0.8	0.8419745431092732
KL157	13	0.95	0.508629727222366
KL157	13	0.975	0.508629727222366


  5%|██████▌                                                                                                                      | 5/96 [00:30<08:54,  5.88s/it]

KL157	13	0.85	0.6449067559962154
KL145	29	0.95	0.8940505765461982
KL145	29	0.7	0.9197709830017538
KL145	29	0.65	0.9377191850351939
KL145	29	0.8	0.9592106040535499
KL145	29	0.85	0.9377191850351939
KL145	29	0.9	0.8940505765461982
KL145	29	0.975	0.8521644595546172


  6%|███████▊                                                                                                                     | 6/96 [00:37<09:39,  6.44s/it]

KL145	29	0.75	0.918113883008419
KL110	64	0.8	0.7253934543512076
KL110	64	0.65	0.7399715312154859
KL110	64	0.85	0.701556087107071
KL110	64	0.9	0.7318020190187686
KL110	64	0.7	0.7463611022792834
KL110	64	0.975	0.7455826612410267
KL110	64	0.95	0.7671179373233533


  7%|█████████                                                                                                                    | 7/96 [00:43<09:11,  6.20s/it]

KL110	64	0.75	0.7594154760554404
KL125	26	0.65	0.0
KL125	26	0.95	0.0
KL125	26	0.975	0.0
KL125	26	0.9	0.0
KL125	26	0.75	0.0
KL125	26	0.8	0.0
KL125	26	0.7	0.0


  8%|██████████▍                                                                                                                  | 8/96 [00:48<08:22,  5.71s/it]

KL125	26	0.85	0.0
KL20	18	0.85	0.0
KL20	18	0.9	0.0
KL20	18	0.975	0.0
KL20	18	0.95	0.0
KL20	18	0.7	0.0
KL20	18	0.65	0.0
KL20	18	0.75	0.0


  9%|███████████▋                                                                                                                 | 9/96 [00:53<07:53,  5.44s/it]

KL20	18	0.8	0.35325310165205387
KL107	1121	0.95	0.7683200029480542
KL107	1121	0.9	0.783322481465053
KL107	1121	0.7	0.778026834085233
KL107	1121	0.85	0.79153889872206
KL107	1121	0.75	0.779557823283383
KL107	1121	0.975	0.7341911238666775
KL107	1121	0.8	0.7685991800579572


 10%|████████████▉                                                                                                               | 10/96 [01:07<11:56,  8.34s/it]

KL107	1121	0.65	0.7782555660369448
KL128	22	0.85	0.8008969483663161
KL128	22	0.75	0.8565177020578651
KL128	22	0.65	0.8565177020578651
KL128	22	0.8	0.8225204313631472
KL128	22	0.9	0.6681751327483401
KL128	22	0.7	0.8348942190610341
KL128	22	0.975	0.5635108026822929


 11%|██████████████▏                                                                                                             | 11/96 [01:13<10:32,  7.44s/it]

KL128	22	0.95	0.623997686700365
KL38	76	0.95	0.6228168610245342
KL38	76	0.8	0.6719144303275467
KL38	76	0.9	0.64814715735069
KL38	76	0.7	0.7560809732775651
KL38	76	0.65	0.7351602771295945
KL38	76	0.75	0.6799404539571781
KL38	76	0.975	0.6285605351290963


 12%|███████████████▌                                                                                                            | 12/96 [01:19<10:03,  7.19s/it]

KL38	76	0.85	0.6911254303953135
KL45	63	0.7	0.8754433904023085
KL45	63	0.75	0.9038451031489667
KL45	63	0.975	0.811063858930806
KL45	63	0.9	0.8938326428466874
KL45	63	0.95	0.8321897272083567
KL45	63	0.8	0.8431789089364122
KL45	63	0.65	0.8524505968250915


 14%|████████████████▊                                                                                                           | 13/96 [01:27<09:53,  7.15s/it]

KL45	63	0.85	0.8840078210857842
KL8	20	0.95	0.6252117575790629
KL8	20	0.975	0.4039594799529941
KL8	20	0.8	0.5234449528811611
KL8	20	0.85	0.5027182824148568
KL8	20	0.65	0.09325048082403138
KL8	20	0.75	0.3804369506451221
KL8	20	0.9	0.5264529551172001


 15%|██████████████████                                                                                                          | 14/96 [01:33<09:35,  7.01s/it]

KL8	20	0.7	0.09325048082403138
KL51	144	0.85	0.8086417598404179
KL51	144	0.75	0.7605051692374113
KL51	144	0.8	0.7180356626866119
KL51	144	0.65	0.7965502349749932
KL51	144	0.95	0.7747386775825617
KL51	144	0.975	0.6974552877085076
KL51	144	0.9	0.8121052962345671


 16%|███████████████████▍                                                                                                        | 15/96 [01:41<09:45,  7.23s/it]

KL51	144	0.7	0.7477867368489729
KL5	28	0.975	0.7088186324881024
KL5	28	0.95	0.6896410538860358
KL5	28	0.75	0.6683628498058707
KL5	28	0.65	0.34699431403239345
KL5	28	0.9	0.7803465387414676
KL5	28	0.7	0.7919957647159694
KL5	28	0.85	0.649713808922839


 17%|████████████████████▋                                                                                                       | 16/96 [01:48<09:35,  7.19s/it]

KL5	28	0.8	0.6860234139604424
KL137	8	0.8	0.20833333333333331
KL137	8	0.65	0.2333333333333333
KL137	8	0.95	0.0
KL137	8	0.85	0.19166666666666665
KL137	8	0.7	0.26666666666666666
KL137	8	0.9	0.26666666666666666
KL137	8	0.975	0.2333333333333333


 18%|█████████████████████▉                                                                                                      | 17/96 [01:54<09:06,  6.92s/it]

KL137	8	0.75	0.26666666666666666
KL103	13	0.975	0.623169810432912
KL103	13	0.9	0.5065990404847633
KL103	13	0.75	0.4663152320645239
KL103	13	0.65	0.512186023054883
KL103	13	0.85	0.5961334836438915
KL103	13	0.8	0.6431075003543253
KL103	13	0.7	0.5262548715767604


 19%|███████████████████████▎                                                                                                    | 18/96 [02:01<08:49,  6.79s/it]

KL103	13	0.95	0.6036970598091546
KL29	37	0.65	0.7547674919964581
KL29	37	0.975	0.48541172318812587
KL29	37	0.9	0.763695403871183
KL29	37	0.75	0.7650867847318666
KL29	37	0.8	0.8228690552042508
KL29	37	0.7	0.8314564514127191
KL29	37	0.95	0.6160578589054944


 20%|████████████████████████▌                                                                                                   | 19/96 [02:09<09:20,  7.27s/it]

KL29	37	0.85	0.7414910730164936
KL6	12	0.65	0.19470808463880668
KL6	12	0.8	0.37865098495967275
KL6	12	0.7	0.0
KL6	12	0.9	0.0
KL6	12	0.975	0.0
KL6	12	0.85	0.0
KL6	12	0.75	0.0


 21%|█████████████████████████▊                                                                                                  | 20/96 [02:16<08:55,  7.04s/it]

KL6	12	0.95	0.0
KL61	10	0.8	0.4897593063409451
KL61	10	0.9	0.5348399724926484
KL61	10	0.975	0.2897593063409451
KL61	10	0.95	0.2516397779494322
KL61	10	0.65	0.5348399724926484
KL61	10	0.7	0.5078719779941188
KL61	10	0.75	0.5078719779941188


 22%|███████████████████████████▏                                                                                                | 21/96 [02:22<08:36,  6.89s/it]

KL61	10	0.85	0.4678719779941187
KL1	190	0.95	0.5401488722819151
KL1	190	0.75	0.5322178848255879
KL1	190	0.65	0.7254799287116673
KL1	190	0.85	0.5444479817538865
KL1	190	0.975	0.5658672517701647
KL1	190	0.7	0.5706777566705462
KL1	190	0.8	0.7741820012161877


 23%|████████████████████████████▍                                                                                               | 22/96 [02:29<08:25,  6.83s/it]

KL1	190	0.9	0.7530795024791
KL136	32	0.8	0.8329674893228074
KL136	32	0.75	0.7847431705784509
KL136	32	0.95	0.27877282097486783
KL136	32	0.7	0.908008828658608
KL136	32	0.65	0.8539755357732747
KL136	32	0.9	0.9236977106128045
KL136	32	0.975	0.7787100792647679


 24%|█████████████████████████████▋                                                                                              | 23/96 [02:36<08:33,  7.03s/it]

KL136	32	0.85	0.6156914657382888
KL53	51	0.975	0.854863535860379
KL53	51	0.85	0.8678497817950193
KL53	51	0.65	0.8682668674794622
KL53	51	0.7	0.8803960672182307
KL53	51	0.9	0.8547829182158215
KL53	51	0.95	0.8540978573052442
KL53	51	0.75	0.8803960672182307


 25%|███████████████████████████████                                                                                             | 24/96 [02:43<08:14,  6.87s/it]

KL53	51	0.8	0.8678497817950193
KL17	481	0.75	0.9270956303128521
KL17	481	0.975	0.9356004731976137
KL17	481	0.9	0.9347235756829937
KL17	481	0.8	0.9370483556655504
KL17	481	0.85	0.9259462608218166
KL17	481	0.95	0.9459271077161702
KL17	481	0.65	0.9320935046955348


 26%|████████████████████████████████▎                                                                                           | 25/96 [02:54<09:44,  8.23s/it]

KL17	481	0.7	0.9448913474803732
KL48	22	0.85	0.5057330617635248
KL48	22	0.7	0.3690954235188991
KL48	22	0.95	0.0
KL48	22	0.975	0.3157375561611819
KL48	22	0.75	0.27857559697053486
KL48	22	0.8	0.199321625076353
KL48	22	0.65	0.39849701170578417


 27%|█████████████████████████████████▌                                                                                          | 26/96 [03:00<08:34,  7.35s/it]

KL48	22	0.9	0.3157375561611819
KL105	114	0.975	0.6592573646739367
KL105	114	0.75	0.6515843333292838
KL105	114	0.8	0.762921542889064
KL105	114	0.7	0.705156832462718
KL105	114	0.65	0.6796823992735131
KL105	114	0.95	0.7844862607071058
KL105	114	0.85	0.7435561769017999


 28%|██████████████████████████████████▉                                                                                         | 27/96 [03:06<08:08,  7.08s/it]

KL105	114	0.9	0.6773944884974388
KL112	60	0.975	0.2968449746594753
KL112	60	0.7	0.6633861454079226
KL112	60	0.95	0.6908022034071399
KL112	60	0.9	0.6633861454079226
KL112	60	0.85	0.7545953176501083
KL112	60	0.65	0.5532561064760572
KL112	60	0.75	0.6766250768408285


 29%|████████████████████████████████████▏                                                                                       | 28/96 [03:12<07:30,  6.63s/it]

KL112	60	0.8	0.6766250768408285
KL16	27	0.85	0.5674973289890533
KL16	27	0.975	0.0
KL16	27	0.65	0.4597050376438301
KL16	27	0.95	0.3911762301902979
KL16	27	0.75	0.4140413198793176
KL16	27	0.9	0.0
KL16	27	0.7	0.0


 30%|█████████████████████████████████████▍                                                                                      | 29/96 [03:18<07:25,  6.64s/it]

KL16	27	0.8	0.0
KL28	112	0.95	0.7717821186631347
KL28	112	0.65	0.8203897164118136
KL28	112	0.9	0.7804348547235289
KL28	112	0.975	0.7197485949847426
KL28	112	0.85	0.8446934684060952
KL28	112	0.8	0.7958884851187231
KL28	112	0.7	0.7893318847344591


 31%|██████████████████████████████████████▊                                                                                     | 30/96 [03:26<07:30,  6.82s/it]

KL28	112	0.75	0.8574204547181027
KL114	24	0.9	0.7573393063673132
KL114	24	0.75	0.7336950448023823
KL114	24	0.975	0.7260538283639957
KL114	24	0.65	0.8056726397006465
KL114	24	0.7	0.7431429911662659
KL114	24	0.8	0.7584635362306736
KL114	24	0.95	0.8056726397006465


 32%|████████████████████████████████████████                                                                                    | 31/96 [03:33<07:30,  6.92s/it]

KL114	24	0.85	0.7607981762117202
KL35	18	0.65	0.30689314297572257
KL35	18	0.8	0.31937256164818817
KL35	18	0.85	0.4934986580010322
KL35	18	0.7	0.3591993243337469
KL35	18	0.975	0.1617194309760283
KL35	18	0.75	0.5025549595675399
KL35	18	0.95	0.0866384347267568


 33%|█████████████████████████████████████████▎                                                                                  | 32/96 [03:40<07:26,  6.98s/it]

KL35	18	0.9	0.2683531393315406
KL169	27	0.7	0.6044457453279557
KL169	27	0.975	0.5445005519165623
KL169	27	0.8	0.2869460024736606
KL169	27	0.85	0.3619460024736606
KL169	27	0.9	0.3619460024736606
KL169	27	0.75	0.5118453480418343
KL169	27	0.65	0.41472310087631276


 34%|██████████████████████████████████████████▋                                                                                 | 33/96 [03:46<07:07,  6.78s/it]

KL169	27	0.95	0.5924768834535825
KL24	288	0.75	0.6992798399306948
KL24	288	0.95	0.666692982499541
KL24	288	0.7	0.6911931784539436
KL24	288	0.85	0.7859092373131645
KL24	288	0.65	0.7983578815863533
KL24	288	0.9	0.7824837004590824
KL24	288	0.975	0.6557671979347547


 35%|███████████████████████████████████████████▉                                                                                | 34/96 [03:52<06:50,  6.62s/it]

KL24	288	0.8	0.7860823825314167
KL4	13	0.8	0.6083545042555153
KL4	13	0.85	0.0
KL4	13	0.9	0.0
KL4	13	0.75	0.4868927816590626
KL4	13	0.65	0.5584921157389406
KL4	13	0.7	0.0
KL4	13	0.975	0.6083545042555153


 36%|█████████████████████████████████████████████▏                                                                              | 35/96 [04:00<06:54,  6.79s/it]

KL4	13	0.95	0.5584921157389406
KL13	66	0.85	0.5220021751920444
KL13	66	0.9	0.5181773245463728
KL13	66	0.95	0.5375563685527533
KL13	66	0.65	0.45328951824746244
KL13	66	0.7	0.4742656646528843
KL13	66	0.975	0.5423584764257191
KL13	66	0.75	0.5049974673396208


 38%|██████████████████████████████████████████████▌                                                                             | 36/96 [04:08<07:17,  7.29s/it]

KL13	66	0.8	0.46669688914403906
KL7	37	0.7	0.9160565752818397
KL7	37	0.65	0.8673563501073295
KL7	37	0.8	0.7573027901945398
KL7	37	0.85	0.7741138735373531
KL7	37	0.9	0.7597794027921165
KL7	37	0.95	0.5654802641612443
KL7	37	0.975	0.5545316176640713


 39%|███████████████████████████████████████████████▊                                                                            | 37/96 [04:13<06:35,  6.71s/it]

KL7	37	0.75	0.8634980652806155
KL30	93	0.9	0.527909491778057
KL30	93	0.8	0.48285383334362425
KL30	93	0.975	0.5430644632451618
KL30	93	0.75	0.5066565976526977
KL30	93	0.7	0.4537113254114122
KL30	93	0.85	0.5324078684588002
KL30	93	0.65	0.4408139292924382


 40%|█████████████████████████████████████████████████                                                                           | 38/96 [04:20<06:34,  6.81s/it]

KL30	93	0.95	0.48789807085912623
KL118	23	0.85	0.8583005902280558
KL118	23	0.7	0.847013342275926
KL118	23	0.9	0.8692079523340958
KL118	23	0.975	0.3371684293371546
KL118	23	0.75	0.9037525005601055
KL118	23	0.8	0.9253570214625478


 41%|██████████████████████████████████████████████████▍                                                                         | 39/96 [04:27<06:29,  6.83s/it]

KL118	23	0.95	0.8421564305750328
KL23	126	0.975	0.698264085243891
KL23	126	0.9	0.7074173706693224
KL23	126	0.8	0.7164803201811585
KL23	126	0.95	0.6574874279380293
KL23	126	0.85	0.7107185758679253
KL23	126	0.65	0.7702824142076944
KL23	126	0.75	0.7323555813066916


 42%|███████████████████████████████████████████████████▋                                                                        | 40/96 [04:35<06:29,  6.96s/it]

KL23	126	0.7	0.7315900406882624
KL41	10	0.75	0.0
KL41	10	0.85	0.0
KL41	10	0.65	0.24271195048676714
KL41	10	0.9	0.0
KL41	10	0.8	0.0
KL41	10	0.975	0.0
KL41	10	0.7	0.0


 43%|████████████████████████████████████████████████████▉                                                                       | 41/96 [04:41<06:14,  6.80s/it]

KL41	10	0.95	0.2296799449852968
KL57	51	0.8	0.7111518450456048
KL57	51	0.9	0.6677430477915955
KL57	51	0.95	0.6743630169119613
KL57	51	0.75	0.7599283099771065
KL57	51	0.85	0.7475287544029864
KL57	51	0.975	0.7689451081296201
KL57	51	0.65	0.7065205577308229


 44%|██████████████████████████████████████████████████████▎                                                                     | 42/96 [04:51<07:02,  7.82s/it]

KL57	51	0.7	0.7293969399632511
KL26	15	0.8	0.7920250002329811
KL26	15	0.65	0.824692994926166
KL26	15	0.975	0.824692994926166
KL26	15	0.9	0.824692994926166
KL26	15	0.95	0.824692994926166
KL26	15	0.7	0.824692994926166
KL26	15	0.75	0.6575872022286244


 45%|███████████████████████████████████████████████████████▌                                                                    | 43/96 [04:56<06:06,  6.91s/it]

KL26	15	0.85	0.0
KL3	145	0.65	0.9456037804621934
KL3	145	0.9	0.9285485619303508
KL3	145	0.975	0.9241972159486967
KL3	145	0.7	0.9374138842053593
KL3	145	0.85	0.9417598219566674
KL3	145	0.75	0.9540832019302129
KL3	145	0.8	0.9540220334991604


 46%|████████████████████████████████████████████████████████▊                                                                   | 44/96 [05:05<06:39,  7.69s/it]

KL3	145	0.95	0.9327129336985479
KL63	52	0.9	0.8398287853148152
KL63	52	0.8	0.896570892633532
KL63	52	0.65	0.8883741819145623
KL63	52	0.85	0.8320099605377004
KL63	52	0.95	0.8191813734318749
KL63	52	0.7	0.9076179113864506
KL63	52	0.975	0.7782125366608916


 47%|██████████████████████████████████████████████████████████▏                                                                 | 45/96 [05:12<06:15,  7.37s/it]

KL63	52	0.75	0.8700513015863783
KL116	25	0.65	0.6460421998151094
KL116	25	0.85	0.5334483823948227
KL116	25	0.7	0.6825195808948088
KL116	25	0.9	0.5764677658240691
KL116	25	0.75	0.5667937859978153
KL116	25	0.95	0.5635285242282705
KL116	25	0.975	0.6761223416485571


 48%|███████████████████████████████████████████████████████████▍                                                                | 46/96 [05:17<05:23,  6.48s/it]

KL116	25	0.8	0.6497819362871373
KL39	37	0.7	0.666250205823954
KL39	37	0.65	0.7047505172379441
KL39	37	0.75	0.7115385823665988
KL39	37	0.975	0.6633245167486489
KL39	37	0.8	0.7047179112907433
KL39	37	0.85	0.6112059393099613
KL39	37	0.95	0.7027576604520903


 49%|████████████████████████████████████████████████████████████▋                                                               | 47/96 [05:23<05:18,  6.50s/it]

KL39	37	0.9	0.6655929696473003
KL119	6	0.85	0.4
KL119	6	0.95	0.4
KL119	6	0.8	0.32909944487358056


 50%|██████████████████████████████████████████████████████████████                                                              | 48/96 [05:26<04:16,  5.35s/it]

KL119	6	0.975	0.4
KL58	6	0.85	0.5290994448735805
KL58	6	0.9	0.0
KL58	6	0.65	0.6
KL58	6	0.95	0.4
KL58	6	0.75	0.6
KL58	6	0.8	0.5290994448735805
KL58	6	0.7	0.6


 51%|███████████████████████████████████████████████████████████████▎                                                            | 49/96 [05:31<04:15,  5.43s/it]

KL58	6	0.975	0.3666666666666667
KL64	896	0.7	0.7893691606503614
KL64	896	0.975	0.8148753503587107
KL64	896	0.8	0.7953763180192234
KL64	896	0.65	0.7959467053352804
KL64	896	0.9	0.7941130758071546
KL64	896	0.95	0.8058528419167632
KL64	896	0.75	0.7922284734757444


 52%|████████████████████████████████████████████████████████████████▌                                                           | 50/96 [05:43<05:40,  7.40s/it]

KL64	896	0.85	0.7914559179999973
KL47	551	0.85	0.8008807940728095
KL47	551	0.8	0.7317773762369625
KL47	551	0.7	0.7304694875323637
KL47	551	0.65	0.7287927057241648
KL47	551	0.95	0.8021742909799096
KL47	551	0.9	0.7989707396135799
KL47	551	0.975	0.8134521307523142


 53%|█████████████████████████████████████████████████████████████████▉                                                          | 51/96 [05:51<05:37,  7.49s/it]

KL47	551	0.75	0.7301333645477319
KL164	6	0.9	0.4
KL164	6	0.7	0.4
KL164	6	0.95	0.0
KL164	6	0.85	0.4
KL164	6	0.975	0.0
KL164	6	0.75	0.4
KL164	6	0.8	0.4


 54%|███████████████████████████████████████████████████████████████████▏                                                        | 52/96 [05:56<04:53,  6.68s/it]

KL164	6	0.65	0.5309307341415954
KL74	82	0.65	0.8249434141935545
KL74	82	0.975	0.8369066155305032
KL74	82	0.9	0.8710347172073895
KL74	82	0.85	0.8542110164438831
KL74	82	0.8	0.870654963559726
KL74	82	0.95	0.83254024708249
KL74	82	0.7	0.8164997110641482


 55%|████████████████████████████████████████████████████████████████████▍                                                       | 53/96 [06:07<05:40,  7.91s/it]

KL74	82	0.75	0.8555564711132002
KL10	111	0.65	0.7054144436438091
KL10	111	0.75	0.7015530217047034
KL10	111	0.7	0.6600027856133822
KL10	111	0.975	0.7107899156480965
KL10	111	0.85	0.6652739360940374
KL10	111	0.8	0.7059811285036233
KL10	111	0.95	0.7418279203297695


 56%|█████████████████████████████████████████████████████████████████████▊                                                      | 54/96 [06:14<05:27,  7.80s/it]

KL10	111	0.9	0.685435364356354
KL46	80	0.8	0.823364494147025
KL46	80	0.95	0.7038550491228497
KL46	80	0.65	0.839289472678952
KL46	80	0.9	0.7700571071773883
KL46	80	0.75	0.7888315338953641
KL46	80	0.975	0.7219530476444588
KL46	80	0.85	0.7598764794739231


 57%|███████████████████████████████████████████████████████████████████████                                                     | 55/96 [06:23<05:27,  7.99s/it]

KL46	80	0.7	0.8154570359775414
KL111	67	0.7	0.7535653728206528
KL111	67	0.65	0.7330025554503083
KL111	67	0.85	0.719257166832617
KL111	67	0.95	0.7636258463776366
KL111	67	0.8	0.7205041052487116
KL111	67	0.975	0.7041542651231655
KL111	67	0.75	0.702516550803478


 58%|████████████████████████████████████████████████████████████████████████▎                                                   | 56/96 [06:31<05:29,  8.25s/it]

KL111	67	0.9	0.6787966676734762
KL54	12	0.8	0.8094328188745654
KL54	12	0.75	0.6811793070692683
KL54	12	0.95	0.849659060318446
KL54	12	0.85	0.7418794964099117
KL54	12	0.7	0.6286393963052502
KL54	12	0.9	0.8060062523125603
KL54	12	0.65	0.7016532549660311


 59%|█████████████████████████████████████████████████████████████████████████▋                                                  | 57/96 [06:38<04:59,  7.69s/it]

KL54	12	0.975	0.0
KL102	242	0.975	0.8928674091722884
KL102	242	0.65	0.8953322341652887
KL102	242	0.95	0.919010939641858
KL102	242	0.8	0.8906573807427612
KL102	242	0.75	0.8852137310719373
KL102	242	0.9	0.939535360789834
KL102	242	0.85	0.9036681949749144


 60%|██████████████████████████████████████████████████████████████████████████▉                                                 | 58/96 [06:44<04:29,  7.10s/it]

KL102	242	0.7	0.887634690721777
KL27	133	0.9	0.7793968056470953
KL27	133	0.975	0.7850350635417376
KL27	133	0.65	0.8034917500214052
KL27	133	0.75	0.7843012323527717
KL27	133	0.85	0.7892982763679252
KL27	133	0.7	0.8137617732879174
KL27	133	0.95	0.7889512397450472


 61%|████████████████████████████████████████████████████████████████████████████▏                                               | 59/96 [06:49<04:00,  6.50s/it]

KL27	133	0.8	0.7991459860402863
KL108	30	0.9	0.4537380684964704
KL108	30	0.85	0.8142624611008076
KL108	30	0.95	0.8630426389007139
KL108	30	0.75	0.7192876778565521
KL108	30	0.65	0.8127778936220492
KL108	30	0.7	0.8104289174575333
KL108	30	0.975	0.6135120085525898


 62%|█████████████████████████████████████████████████████████████████████████████▌                                              | 60/96 [06:57<04:15,  7.10s/it]

KL108	30	0.8	0.7495902627130641
KL166	10	0.95	0.0
KL166	10	0.65	0.0
KL166	10	0.75	0.4845199174779452
KL166	10	0.8	0.5393598899705936
KL166	10	0.975	0.0
KL166	10	0.7	0.5393598899705936
KL166	10	0.85	0.0


 64%|██████████████████████████████████████████████████████████████████████████████▊                                             | 61/96 [07:02<03:44,  6.42s/it]

KL166	10	0.9	0.0
KL36	88	0.65	0.8329389509402468
KL36	88	0.8	0.788780046374078
KL36	88	0.95	0.665618142795237
KL36	88	0.975	0.5548479141748701
KL36	88	0.9	0.8578835813540752
KL36	88	0.75	0.8571315705567037
KL36	88	0.85	0.8527128112481998


 65%|████████████████████████████████████████████████████████████████████████████████                                            | 62/96 [07:10<03:56,  6.96s/it]

KL36	88	0.7	0.8893345614722639
KL56	13	0.85	0.8222150869650913
KL56	13	0.7	0.8326670349364952
KL56	13	0.8	0.8720774754816659
KL56	13	0.9	0.8222150869650913
KL56	13	0.65	0.8222150869650913
KL56	13	0.95	0.8083545042555152
KL56	13	0.75	0.8720774754816659


 66%|█████████████████████████████████████████████████████████████████████████████████▍                                          | 63/96 [07:16<03:39,  6.65s/it]

KL56	13	0.975	0.3512705687284774
KL12	37	0.65	0.806921313541235
KL12	37	0.9	0.8069097048206171
KL12	37	0.85	0.7669682224238747
KL12	37	0.95	0.7274678708926617
KL12	37	0.975	0.6742635175445025
KL12	37	0.8	0.6073907646629402
KL12	37	0.75	0.674322183396957


 67%|██████████████████████████████████████████████████████████████████████████████████▋                                         | 64/96 [07:24<03:46,  7.08s/it]

KL12	37	0.7	0.668968071085704
KL142	14	0.975	0.0
KL142	14	0.8	0.0
KL142	14	0.65	0.4835095697364032
KL142	14	0.75	0.42370264094611865
KL142	14	0.85	0.2801760110220238
KL142	14	0.7	0.30888157828094914
KL142	14	0.95	0.21693045781865616


 68%|███████████████████████████████████████████████████████████████████████████████████▉                                        | 65/96 [07:30<03:29,  6.76s/it]

KL142	14	0.9	0.32539568672798425
KL19	73	0.65	0.813689481617468
KL19	73	0.975	0.7500255274661177
KL19	73	0.7	0.8241374178769998
KL19	73	0.8	0.8147444442748956
KL19	73	0.9	0.8377220834156845
KL19	73	0.95	0.8287582800559266
KL19	73	0.75	0.8296847854689561


 69%|█████████████████████████████████████████████████████████████████████████████████████▎                                      | 66/96 [07:38<03:33,  7.12s/it]

KL19	73	0.85	0.8278668631815479
KL60	68	0.95	0.773710749189824
KL60	68	0.85	0.7511373779570631
KL60	68	0.8	0.8022951498134302
KL60	68	0.75	0.7935974991411436
KL60	68	0.7	0.7461298706369064
KL60	68	0.65	0.7626547263301948
KL60	68	0.9	0.7409437145684152


 70%|██████████████████████████████████████████████████████████████████████████████████████▌                                     | 67/96 [07:47<03:41,  7.63s/it]

KL60	68	0.975	0.7347856223289054
KL34	20	0.75	0.8214485105787369
KL34	20	0.9	0.6852893340057737
KL34	20	0.95	0.7661521552080864
KL34	20	0.85	0.8761234037828133
KL34	20	0.975	0.6955318125937661
KL34	20	0.65	0.8180617018914066
KL34	20	0.7	0.8761234037828133


 71%|███████████████████████████████████████████████████████████████████████████████████████▊                                    | 68/96 [07:56<03:41,  7.91s/it]

KL34	20	0.8	0.8020118866854066
KL55	24	0.95	0.563870381090141
KL55	24	0.65	0.18441016980093006
KL55	24	0.975	0.6231624002206047
KL55	24	0.8	0.44382020426054664
KL55	24	0.9	0.5542078800743446
KL55	24	0.85	0.40513668157435867
KL55	24	0.7	0.4854537789871625


 72%|█████████████████████████████████████████████████████████████████████████████████████████▏                                  | 69/96 [08:03<03:32,  7.87s/it]

KL55	24	0.75	0.0
KL14	130	0.8	0.8404079482578893
KL14	130	0.65	0.8045290011665339
KL14	130	0.975	0.7734260432538841
KL14	130	0.95	0.8405802242718812
KL14	130	0.7	0.8298083769873525
KL14	130	0.9	0.8461617834111638
KL14	130	0.85	0.8456204699615617


 73%|██████████████████████████████████████████████████████████████████████████████████████████▍                                 | 70/96 [08:07<02:51,  6.59s/it]

KL14	130	0.75	0.8307677522717145
KL9	21	0.85	0.47299272244288304
KL9	21	0.9	0.5118375536847697
KL9	21	0.8	0.5118375536847697
KL9	21	0.975	0.48448330943980844
KL9	21	0.7	0.31064982573546057
KL9	21	0.95	0.47877567752288264
KL9	21	0.75	0.3519565419509636


 74%|███████████████████████████████████████████████████████████████████████████████████████████▋                                | 71/96 [08:13<02:40,  6.41s/it]

KL9	21	0.65	0.3749815775888444
KL153	18	0.9	0.1851640199545103
KL153	18	0.7	0.727860451253153
KL153	18	0.85	0.4386826293765435
KL153	18	0.65	0.6638199290607859
KL153	18	0.975	0.4440992866542602
KL153	18	0.8	0.1851640199545103
KL153	18	0.75	0.6638199290607859


 75%|█████████████████████████████████████████████████████████████████████████████████████████████                               | 72/96 [08:19<02:32,  6.35s/it]

KL153	18	0.95	0.48741718030204123


 76%|██████████████████████████████████████████████████████████████████████████████████████████████▎                             | 73/96 [08:20<01:49,  4.76s/it]

KL155	13	0.95	0.5828046464199206
KL21	100	0.85	0.8633539508455158
KL21	100	0.9	0.8343068922610168
KL21	100	0.95	0.6520463495872274
KL21	100	0.7	0.8702766599530433
KL21	100	0.65	0.8771289233426439
KL21	100	0.975	0.8568866933588991
KL21	100	0.75	0.8814535505132594


 77%|███████████████████████████████████████████████████████████████████████████████████████████████▌                            | 74/96 [08:29<02:10,  5.92s/it]

KL21	100	0.8	0.8634280438124123
KL113	5	0.95	0.6
KL113	5	0.7	0.6
KL113	5	0.85	0.6
KL113	5	0.975	0.6
KL113	5	0.75	0.6
KL113	5	0.8	0.3264911064067352
KL113	5	0.9	0.6


 78%|████████████████████████████████████████████████████████████████████████████████████████████████▉                           | 75/96 [08:34<01:59,  5.70s/it]

KL113	5	0.65	0.6
KL81	37	0.8	0.7125963481516013
KL81	37	0.85	0.7300563323797485
KL81	37	0.95	0.6698435189957924
KL81	37	0.975	0.5606777112249656
KL81	37	0.9	0.7300563323797485
KL81	37	0.75	0.7434378890886685
KL81	37	0.65	0.7300563323797485


 79%|██████████████████████████████████████████████████████████████████████████████████████████████████▏                         | 76/96 [08:43<02:16,  6.81s/it]

KL81	37	0.7	0.7001173443703046
KL31	16	0.95	0.5264572169596291
KL31	16	0.7	0.6941623152721133
KL31	16	0.85	0.5060448024364359
KL31	16	0.975	0.484880823835897
KL31	16	0.8	0.6941623152721133
KL31	16	0.9	0.6713709844215234
KL31	16	0.75	0.5264572169596291


 80%|███████████████████████████████████████████████████████████████████████████████████████████████████▍                        | 77/96 [08:51<02:12,  6.97s/it]

KL31	16	0.65	0.7264572169596291
KL151	61	0.8	0.8815558417403618
KL151	61	0.975	0.8788119929260824
KL151	61	0.65	0.8706300278909864
KL151	61	0.9	0.826429110039131
KL151	61	0.75	0.8794343358362413
KL151	61	0.85	0.868651038658241
KL151	61	0.7	0.879032501401097


 81%|████████████████████████████████████████████████████████████████████████████████████████████████████▊                       | 78/96 [08:59<02:09,  7.20s/it]

KL151	61	0.95	0.9111425409906163
KL123	29	0.7	0.6023230679059846
KL123	29	0.95	0.5486694541769019
KL123	29	0.75	0.6162766965276483
KL123	29	0.85	0.6298843133197235
KL123	29	0.9	0.5537305022515863
KL123	29	0.975	0.644724285812372
KL123	29	0.8	0.5451033260318154


 82%|██████████████████████████████████████████████████████████████████████████████████████████████████████                      | 79/96 [09:06<02:01,  7.16s/it]

KL123	29	0.65	0.6128757055006645
KL140	13	0.75	0.0
KL140	13	0.975	0.0
KL140	13	0.85	0.0
KL140	13	0.7	0.0
KL140	13	0.9	0.0
KL140	13	0.8	0.6509953457616267
KL140	13	0.95	0.6115849052164559


 83%|███████████████████████████████████████████████████████████████████████████████████████████████████████▎                    | 80/96 [09:13<01:56,  7.27s/it]

KL140	13	0.65	0.0
KL159	9	0.975	0.5366563145999496
KL159	9	0.9	0.5461059350777525
KL159	9	0.75	0.6222882290838749
KL159	9	0.95	0.42388371285553034
KL159	9	0.8	0.6222882290838749
KL159	9	0.7	0.5461059350777525
KL159	9	0.65	0.6683281572999747


 84%|████████████████████████████████████████████████████████████████████████████████████████████████████████▋                   | 81/96 [09:20<01:47,  7.14s/it]

KL159	9	0.85	0.6024922359499622
KL109	16	0.85	0.0917662935482247
KL109	16	0.975	0.04164945273573132
KL109	16	0.95	0.20063250433858817
KL109	16	0.8	0.28908630060575846
KL109	16	0.75	0.3214658376719215
KL109	16	0.9	0.05
KL109	16	0.65	0.30949871512895166


 85%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▉                  | 82/96 [09:30<01:50,  7.92s/it]

KL109	16	0.7	0.3590561112148561
KL2	351	0.7	0.7870093133755109
KL2	351	0.85	0.8131165601721763
KL2	351	0.75	0.8106375112488822
KL2	351	0.975	0.8183531490273618
KL2	351	0.9	0.7997437185602873
KL2	351	0.8	0.7711623317322923
KL2	351	0.65	0.8234714032971896


 86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▏                | 83/96 [09:38<01:43,  7.93s/it]

KL2	351	0.95	0.8127425794452693
KL106	487	0.9	0.750220074826942
KL106	487	0.65	0.744066783125719
KL106	487	0.8	0.760092350564167
KL106	487	0.7	0.7132974859012915
KL106	487	0.85	0.7538109845802491
KL106	487	0.75	0.7610230954407702
KL106	487	0.975	0.7955178382327222


 88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▌               | 84/96 [09:47<01:41,  8.49s/it]

KL106	487	0.95	0.715872601442953
KL122	29	0.65	0.6948138414643185
KL122	29	0.95	0.5989501779575468
KL122	29	0.7	0.5617862333196895
KL122	29	0.75	0.6805598132711541
KL122	29	0.85	0.585477207921569
KL122	29	0.9	0.6126994382447839
KL122	29	0.975	0.5315412189245097


 89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▊              | 85/96 [09:56<01:33,  8.46s/it]

KL122	29	0.8	0.6280194823390599
KL70	30	0.9	0.8325591575278423
KL70	30	0.7	0.8540505765461983
KL70	30	0.975	0.585477207921569
KL70	30	0.65	0.8415512198823579
KL70	30	0.8	0.7886644890949661
KL70	30	0.85	0.8092852470120718
KL70	30	0.95	0.7875677680498353


 90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████             | 86/96 [10:03<01:19,  7.95s/it]

KL70	30	0.75	0.8129538555010674
KL43	44	0.85	0.6137062115991918
KL43	44	0.75	0.6759019433786754
KL43	44	0.9	0.6663387912104103
KL43	44	0.8	0.6830890413208687
KL43	44	0.95	0.6630529202099409
KL43	44	0.975	0.6477760209301143
KL43	44	0.65	0.6958190517966801


 91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍           | 87/96 [10:09<01:08,  7.57s/it]

KL43	44	0.7	0.5647688696083648
KL127	33	0.8	0.7267946287611349
KL127	33	0.95	0.6173565491966058
KL127	33	0.7	0.7517898843710297
KL127	33	0.85	0.6853596221955532
KL127	33	0.975	0.5555321948951558
KL127	33	0.9	0.7306915336736953
KL127	33	0.65	0.7719206508249042


 92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋          | 88/96 [10:16<00:58,  7.35s/it]

KL127	33	0.75	0.7049971173712776
KL25	269	0.75	0.7193576843421199
KL25	269	0.8	0.8031584099355625
KL25	269	0.65	0.8077083116499946
KL25	269	0.85	0.8090144660938176
KL25	269	0.7	0.800855500255708
KL25	269	0.95	0.8133964215502099
KL25	269	0.975	0.7713903646234085


 93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉         | 89/96 [10:26<00:55,  7.99s/it]

KL25	269	0.9	0.8104285031127371
KL67	11	0.8	0.5327180698955954
KL67	11	0.975	0.44161764098602485
KL67	11	0.65	0.6416176409860248
KL67	11	0.75	0.6416176409860248
KL67	11	0.95	0.44161764098602485
KL67	11	0.7	0.5880351827269407
KL67	11	0.85	0.6416176409860248


 94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎       | 90/96 [10:31<00:43,  7.31s/it]

KL67	11	0.9	0.5234358228042066
KL18	22	0.95	0.8669831455354439
KL18	22	0.8	0.9749534099472598
KL18	22	0.7	0.9749534099472598
KL18	22	0.85	0.9497259894483203
KL18	22	0.65	0.9281025064514893
KL18	22	0.75	0.9191239002203043
KL18	22	0.975	0.7140958292436097


 95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌      | 91/96 [10:38<00:35,  7.18s/it]

KL18	22	0.9	0.8641864705982639
KL62	152	0.75	0.7751956250304138
KL62	152	0.9	0.8077473737676919
KL62	152	0.7	0.8226132722270215
KL62	152	0.95	0.7610087528154023
KL62	152	0.975	0.6962291732320499
KL62	152	0.85	0.8421478903681391
KL62	152	0.8	0.7881411916911545


 96%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊     | 92/96 [10:45<00:28,  7.14s/it]

KL62	152	0.65	0.8656303531855091
KL22	78	0.7	0.838653075150403
KL22	78	0.8	0.8643580640998859
KL22	78	0.95	0.8729157743724033
KL22	78	0.975	0.838496373676089
KL22	78	0.85	0.8815055597501619
KL22	78	0.65	0.8638591653279725
KL22	78	0.9	0.873373184429451


 97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 93/96 [10:50<00:19,  6.56s/it]

KL22	78	0.75	0.8818479844582227
KL149	70	0.8	0.8738201197795396
KL149	70	0.85	0.8242089280688196
KL149	70	0.95	0.8137331936438471
KL149	70	0.75	0.8357626362394908
KL149	70	0.975	0.7172447843224223
KL149	70	0.9	0.8925835596303449
KL149	70	0.7	0.8673515541192927


 98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 94/96 [10:58<00:13,  7.00s/it]

KL149	70	0.65	0.8642699761248377
KL152	10	0.8	0.4148399724926484
KL152	10	0.65	0.4296799449852968
KL152	10	0.85	0.4696799449852968
KL152	10	0.975	0.4696799449852968
KL152	10	0.95	0.4696799449852968
KL152	10	0.75	0.4696799449852968
KL152	10	0.7	0.4148399724926484


 99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 95/96 [11:06<00:07,  7.14s/it]

KL152	10	0.9	0.4696799449852968
KL117	17	0.9	0.584665064908091
KL117	17	0.975	0.5526986749914522
KL117	17	0.95	0.584665064908091
KL117	17	0.8	0.6612231023961064
KL117	17	0.65	0.3694527909108401
KL117	17	0.75	0.728442672656688
KL117	17	0.85	0.48081243640493787


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 96/96 [11:12<00:00,  7.00s/it]

KL117	17	0.7	0.7779173459253124





In [15]:
threshold = 15

names_metric_col = ["KL_type", "Count", "c_value","mean_mcc"]
metric_df = pd.read_csv(f"/media/concha-eloko/Linux/PPT_clean/Seqbased_model/Seqbased_models.prophages_metrics.tsv", sep = "\t", names = names_metric_col)

metric_eval_df = metric_df[metric_df["Count"] > threshold]

weighted_mcc_dico = {}
for cluster in metric_eval_df["c_value"].unique().tolist() :
    mcc_sum = 0
    cl_df = metric_eval_df[metric_eval_df["c_value"] == cluster]
    for _,row in cl_df.iterrows() : 
        mcc_sum = mcc_sum + row["mean_mcc"] * row["Count"]
    weighted_mcc = mcc_sum / (sum(cl_df["Count"]))
    weighted_mcc_dico[cluster] = weighted_mcc

weighted_mcc_dico

{0.9: 0.7751099285229077,
 0.975: 0.741274524549705,
 0.8: 0.7697635340143436,
 0.65: 0.7708792611301989,
 0.95: 0.7567348331288974,
 0.75: 0.761438770189088,
 0.7: 0.7621235953812278,
 0.85: 0.7772843284953614}

***
### Get the full metrics : 

> Relevant objects path :

In [16]:
cl_value = "0.85"

KLtypes = set([file.split("_RF_")[1].split(".full_data")[0] for file in os.listdir(path_jobs)])

KLtypes_paths = {kltype : [f"{path_jobs}/{file}" for file in os.listdir(path_jobs) if file.split("_")[0] == cl_value if file.split("_RF_")[1].split(".full_data")[0]==kltype]
                for kltype in KLtypes if dico_prophage_count[kltype] > threshold}



In [17]:
KLtypes_paths

{'KL15': ['/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.85_RF_KL15.full_data.joblib'],
 'KL145': ['/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.85_RF_KL145.full_data.joblib'],
 'KL110': ['/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.85_RF_KL110.full_data.joblib'],
 'KL125': ['/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.85_RF_KL125.full_data.joblib'],
 'KL20': ['/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.85_RF_KL20.full_data.joblib'],
 'KL107': ['/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.85_RF_KL107.full_data.joblib'],
 'KL128': ['/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.85_RF_KL128.full_data.joblib'],
 'KL38': ['/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.85_RF_KL38.full_data.joblib'],
 'KL45': ['/media/concha-eloko/Linux/PPT_clean/ficheros_28032023/RF_1302_models/0.85_R

In [11]:
def get_mean_MCC(path_data_object) :
    data_object = joblib.load(f"{path_data_object}")
    mcc_values = []
    for i in data_object :
        y_test = data_object[i]["test_data"][0].values
        predictions = data_object[i]["test_data"][1]
        mcc = matthews_corrcoef(y_test, predictions)
        mcc_values.append(mcc)
    del data_object
    mean_mcc = mean(mcc_values)
    return mean_mcc

def get_mean_metrics(path_data_object) :
    data_object = joblib.load(f"{path_data_object}")
    mcc_values = []
    auc_values = []
    recall_values = []
    f1_values = []
    acc_values = []
    prec_values = []
    report_list = []
    for i in data_object :
        y_test = data_object[i]["test_data"][0].values
        predictions = data_object[i]["test_data"][1]
        mcc = matthews_corrcoef(y_test, predictions)
        auc = roc_auc_score(y_test, predictions)
        auc_values.append(auc)
        mcc_values.append(mcc)
        # Report : 
        report = classification_report(y_test, predictions, output_dict=True)
        recall_values.append(report["1"]["recall"])
        f1_values.append(report["1"]["f1-score"])
        prec_values.append(report["1"]["precision"])
        acc_values.append(report["accuracy"])
        report_list.append(report)
    del data_object
    mean_mcc = mean(mcc_values)
    mean_auc = mean(auc_values)
    mean_recall = mean(recall_values)
    mean_f1 = mean(f1_values)
    mean_acc = mean(acc_values)
    mean_prec = mean(prec_values)
    return mean_mcc , mean_recall , mean_f1 , mean_acc , mean_prec , mean_auc



In [18]:
mean_mcc , mean_recall , mean_f1 , mean_acc , mean_prec , mean_auc = get_mean_metrics(KLtypes_paths["KL47"][0])

In [19]:
with open(f"/media/concha-eloko/Linux/PPT_clean/Seqbased_model/Seqbased_models.prophages_FULL_metrics.tsv", "w") as outfile :
    outfile.write(f"KL_type\tn_prophages\tF1\trecall\tprecision\taccuracy\tAUC\tMCC\n")
    for kltype in KLtypes_paths :
        count_kltype = dico_prophage_count[kltype]
        mean_mcc , mean_recall , mean_f1 , mean_acc , mean_prec , mean_auc = get_mean_metrics(KLtypes_paths[kltype][0])
        outfile.write(f"{kltype}\t{count_kltype}\t{mean_f1}\t{mean_recall}\t{mean_prec}\t{mean_acc}\t{mean_auc}\t{mean_mcc}\n")
        print(f"{kltype}\t{count_kltype}\t{mean_f1}\t{mean_recall}\t{mean_prec}\t{mean_acc}\t{mean_auc}\t{mean_mcc}")

KL15	204	0.8114588070327643	0.757026713124274	0.8778850278850279	0.9417461146663184	0.8678337449116517	0.781243516812687
KL145	29	0.9418181818181818	0.9	1.0	0.9833333333333333	0.95	0.9377191850351939
KL110	64	0.7280331262939959	0.6615384615384615	0.8526050420168068	0.9205128205128206	0.816923076923077	0.701556087107071


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


KL125	26	0.0	0.0	0.0	0.8334677419354839	0.5	0.0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


KL20	18	0.0	0.0	0.0	0.8337662337662338	0.5	0.0
KL107	1121	0.8266579480598523	0.8591785714285715	0.7972308587597026	0.9399890351240057	0.9076641520244462	0.79153889872206
KL128	22	0.808008658008658	0.72	0.9666666666666667	0.947008547008547	0.8554545454545455	0.8008969483663161
KL38	76	0.6828671328671329	0.5283333333333333	1.0	0.9211180124223602	0.7641666666666667	0.6911254303953135
KL45	63	0.8955204216073781	0.8282051282051281	0.9833333333333333	0.9683157894736842	0.9125152625152625	0.8840078210857842
KL8	20	0.4780952380952381	0.35	0.9	0.8833333333333333	0.67	0.5027182824148568
KL51	144	0.8402082793189871	0.84064039408867	0.8407739934436264	0.946773759913967	0.9043479748221128	0.8086417598404179
KL5	28	0.6435497835497835	0.5466666666666666	0.9266666666666666	0.913781512605042	0.7664367816091954	0.649713808922839
KL29	37	0.76	0.6678571428571428	0.9047619047619048	0.9367676767676768	0.828523166023166	0.7414910730164936
KL1	190	0.5279575510104982	0.37894736842105264	0.9261403508771929	0.89

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


KL48	22	0.4857142857142857	0.37	0.8	0.8943019943019943	0.6849999999999999	0.5057330617635248
KL105	114	0.784108101106944	0.8059288537549407	0.7742857142857142	0.9254401030485186	0.8775258303862422	0.7435561769017999
KL112	60	0.7708827404479579	0.6666666666666666	0.9368181818181818	0.9361111111111111	0.8283333333333333	0.7545953176501083
KL16	27	0.5580952380952382	0.42666666666666664	0.9	0.9007575757575758	0.7096296296296296	0.5674973289890533
KL28	112	0.8571776630017277	0.7692028985507247	0.9787114845938375	0.9589851163719908	0.8829065340211251	0.8446934684060952
KL114	24	0.7672150072150072	0.7000000000000001	0.9266666666666666	0.9374384236453203	0.8416666666666667	0.7607981762117202


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


KL35	18	0.5066666666666667	0.39999999999999997	0.7	0.8891774891774892	0.6944444444444444	0.4934986580010322


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


KL169	27	0.3047619047619048	0.19333333333333333	0.8	0.8645833333333334	0.5966666666666667	0.3619460024736606
KL24	288	0.8177182385659331	0.7750151240169388	0.8672900649621551	0.9423264646599256	0.8753968353648708	0.7859092373131645
KL13	66	0.5040935672514619	0.3505494505494506	0.91	0.8864556962025316	0.6722444222444223	0.5220021751920444
KL7	37	0.7675524475524476	0.6571428571428571	1.0	0.9417171717171717	0.8285714285714286	0.7741138735373531
KL30	93	0.53952676439558	0.4070175438596491	0.8492307692307692	0.887065637065637	0.6949066213921902	0.5324078684588002
KL118	23	0.8787878787878788	0.88	0.8933333333333333	0.9568783068783069	0.9269565217391305	0.8583005902280558
KL23	126	0.7268591636033497	0.6012307692307692	0.9317251461988304	0.925787284610814	0.7959278846153846	0.7107185758679253
KL57	51	0.7691950464396284	0.6654545454545454	0.9206349206349206	0.9345848757271286	0.8268449197860962	0.7475287544029864
KL3	145	0.9502036635845735	0.9241379310344827	0.9795238095238096	0.983908045977011

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


KL116	25	0.542063492063492	0.44	0.7333333333333333	0.9	0.716	0.5334483823948227
KL39	37	0.634032634032634	0.5178571428571428	0.8533333333333334	0.9010101010101009	0.7481177606177606	0.6112059393099613
KL64	896	0.8245352310419161	0.7992985723153321	0.8517374747497993	0.9433289151677695	0.8857139461353695	0.7914559179999973
KL47	551	0.8345786860096365	0.8603439803439803	0.8105039946212408	0.9431388859688012	0.9100267996093768	0.8008807940728095
KL74	82	0.8665813278716504	0.7941176470588235	0.9684848484848485	0.9614100185528757	0.8946197991391679	0.8542110164438831
KL10	111	0.6856307435254804	0.5553359683794467	0.8990476190476191	0.9152681039248204	0.7714179841897233	0.6652739360940374
KL46	80	0.781898270805946	0.7191176470588235	0.890316742081448	0.9362718283189565	0.8496822803195352	0.7598764794739231
KL111	67	0.7497154150197629	0.654945054945055	0.8777777777777778	0.9277777777777778	0.8185173035919304	0.719257166832617
KL102	242	0.9145957646862513	0.855187074829932	0.9869444444444444	0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


KL55	24	0.36190476190476195	0.24000000000000002	0.8	0.8733333333333334	0.62	0.40513668157435867
KL14	130	0.858716085992187	0.7769230769230769	0.9721056721056721	0.958974358974359	0.8861538461538462	0.8456204699615617


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


KL9	21	0.4833333333333333	0.37	0.7	0.8886153846153846	0.6802380952380952	0.47299272244288304
KL153	18	0.5053968253968254	0.45	0.64	0.8428571428571429	0.6861111111111111	0.4386826293765435
KL21	100	0.8787633419212367	0.82	0.9555555555555555	0.9633333333333334	0.906	0.8633539508455158
KL81	37	0.7319347319347319	0.5785714285714285	1.0	0.9298550724637681	0.7892857142857143	0.7300563323797485


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


KL31	16	0.5314285714285715	0.48333333333333334	0.6	0.9057894736842105	0.7354166666666666	0.5060448024364359
KL151	61	0.8819875776397516	0.8038461538461539	0.9818181818181818	0.9644946316179193	0.9002837326607818	0.868651038658241
KL123	29	0.6266666666666667	0.5	0.9333333333333333	0.9055555555555556	0.7433333333333333	0.6298843133197235


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


KL109	16	0.08	0.05	0.2	0.8436842105263157	0.525	0.0917662935482247
KL2	351	0.834817120753062	0.7534406438631791	0.9374125780431639	0.9504248628395557	0.8716211717899465	0.8131165601721763
KL106	487	0.7956232917703557	0.8200294550810014	0.772739221599344	0.9297887680169311	0.8858838481948975	0.7538109845802491
KL122	29	0.5504761904761905	0.39999999999999997	1.0	0.9	0.7	0.585477207921569
KL70	30	0.8151515151515152	0.7333333333333334	0.9666666666666667	0.95	0.8633333333333334	0.8092852470120718
KL43	44	0.6373809523809524	0.5277777777777778	0.8361904761904762	0.9055152394775037	0.7547979797979798	0.6137062115991918
KL127	33	0.6707070707070707	0.5285714285714286	1.0	0.9196153846153846	0.7642857142857142	0.6853596221955532
KL25	269	0.8380233762041729	0.8104122990915443	0.8712717446785647	0.9479491567794166	0.8929384915532071	0.8090144660938176
KL18	22	0.9555555555555556	0.96	0.96	0.9849002849002849	0.9754545454545455	0.9497259894483203
KL62	152	0.8630735741428277	0.8094623655913978	0.9292857

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


***
### Dump the best model for each KL types :

In [20]:
path_models = "/media/concha-eloko/Linux/PPT_clean/Seqbased_model/1702_models"
cl_value = "0.85"

KLtypes = set([file.split("_RF_")[1].split(".full_data")[0] for file in os.listdir(path_jobs)])

KLtypes_paths = {kltype : [f"{path_jobs}/{file}" for file in os.listdir(path_jobs) if file.split("_")[0] == cl_value if file.split("_RF_")[1].split(".full_data")[0]==kltype]
                for kltype in KLtypes}


In [21]:
def save_best_model(path_data_object) :
    data_object = joblib.load(f"{path_data_object}")
    mcc_values = []
    kltype = path_data_object.split("/")[-1].split("RF_")[1].split(".full")[0]
    for i in range(0,5) :
        y_test = data_object[i]["test_data"][0].values
        predictions = data_object[i]["test_data"][1]
        mcc = matthews_corrcoef(y_test, predictions)
        mcc_values.append(mcc)
    #print(kltype)
    #print(mcc_values)
    #print(np.argmax(mcc_values))
    #print(data_object[np.argmax(mcc_values)])
    #print("\n")
    best_model = data_object[np.argmax(mcc_values)]["model"]
    joblib.dump(best_model, f'{path_models}/{cl_value}_RF_{kltype}.best_RF_model.joblib')
    del data_object
    print(f"Model saved for {kltype}")

for kltype in KLtypes_paths :
    save_best_model(KLtypes_paths[kltype][0])
    


Model saved for KL71
Model saved for KL139
Model saved for KL124
Model saved for KL15
Model saved for KL157
Model saved for KL145
Model saved for KL110
Model saved for KL125
Model saved for KL20
Model saved for KL107
Model saved for KL128
Model saved for KL38
Model saved for KL45
Model saved for KL8
Model saved for KL51
Model saved for KL5
Model saved for KL137
Model saved for KL103
Model saved for KL29
Model saved for KL6
Model saved for KL61
Model saved for KL1
Model saved for KL136
Model saved for KL53
Model saved for KL17
Model saved for KL48
Model saved for KL105
Model saved for KL112
Model saved for KL16
Model saved for KL28
Model saved for KL114
Model saved for KL35
Model saved for KL169
Model saved for KL24
Model saved for KL4
Model saved for KL13
Model saved for KL7
Model saved for KL30
Model saved for KL118
Model saved for KL23
Model saved for KL41
Model saved for KL57
Model saved for KL26
Model saved for KL3
Model saved for KL63
Model saved for KL116
Model saved for KL39
Mod

IndexError: list index out of range