In [1]:
# at the very top of your notebook:
%load_ext autoreload
%autoreload 2


In [None]:
import ast
import math
import random
import json
import warnings
from collections import Counter, deque
import copy
import os
import re

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader, TensorDataset
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

from sklearn.exceptions import UndefinedMetricWarning
from sklearn.metrics import (
    accuracy_score, average_precision_score, f1_score,
    precision_score, recall_score, roc_auc_score, classification_report,
    precision_recall_curve
)
from sklearn.preprocessing import MultiLabelBinarizer, StandardScaler

from iterstrat.ml_stratifiers import (
    MultilabelStratifiedKFold, MultilabelStratifiedShuffleSplit
)
from pathlib import Path

from utils_corrected import (
    compute_ensemble_probs, predict_without_MCD,
    chunked_mad_over_runs, build_pred_annots_dict, load_kfold_ensembles, PFP
)

In [2]:
#read in TUNED_MODEL_ARCHS.json
with open('TUNED_MODEL_ARCHS.json') as f:
    TUNED_MODEL_ARCHS = json.load(f)

Load models

In [9]:
MF_CHOSEN_CONFIG = TUNED_MODEL_ARCHS['MFO']['CHOSEN_CONFIG']
MF_CHOSEN_CONFIG_ARCH = TUNED_MODEL_ARCHS['MFO']['CHOSEN_CONFIG_ARCH']
MF_CHOSEN_CONFIG_LOSS = 'Balanced'
MF_CHOSEN_LEARNING_RATE = TUNED_MODEL_ARCHS['MFO']['CHOSEN_CONFIG_LR']
MF_CHOSEN_NUM_CLASSES = TUNED_MODEL_ARCHS['MFO']['NUM_CLASSES']
MF_dropout_rate   = 0.3
MF_hidden_dims    = MF_CHOSEN_CONFIG_ARCH
MF_input_dim      = 512
MF_output_dim     = MF_CHOSEN_NUM_CLASSES
MF_device         = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# MF_base_kwargs should match what PFP expects, e.g. {"input_dim":..., "hidden_dims":..., ...}
MF_base_kwargs    = {
    'input_dim'   : MF_input_dim,
    'hidden_dims' : MF_hidden_dims,
    'output_dim'  : MF_output_dim,
    'dropout_rate': MF_dropout_rate,
}

BP_CHOSEN_CONFIG = TUNED_MODEL_ARCHS['BPO']['CHOSEN_CONFIG']
BP_CHOSEN_CONFIG_ARCH = TUNED_MODEL_ARCHS['BPO']['CHOSEN_CONFIG_ARCH']
BP_CHOSEN_CONFIG_LOSS = 'Balanced'
BP_CHOSEN_LEARNING_RATE = TUNED_MODEL_ARCHS['BPO']['CHOSEN_CONFIG_LR']
BP_CHOSEN_NUM_CLASSES = TUNED_MODEL_ARCHS['BPO']['NUM_CLASSES']
BP_dropout_rate   = 0.3
BP_hidden_dims    = BP_CHOSEN_CONFIG_ARCH
BP_input_dim      = 512
BP_output_dim     = BP_CHOSEN_NUM_CLASSES
BP_device         = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# BP_base_kwargs should match what PFP expects, e.g. {"input_dim":..., "hidden_dims":..., ...}
BP_base_kwargs    = {
    'input_dim'   : BP_input_dim,
    'hidden_dims' : BP_hidden_dims,
    'output_dim'  : BP_output_dim,
    'dropout_rate': BP_dropout_rate,
}

CC_CHOSEN_CONFIG = TUNED_MODEL_ARCHS['CCO']['CHOSEN_CONFIG']
CC_CHOSEN_CONFIG_ARCH = TUNED_MODEL_ARCHS['CCO']['CHOSEN_CONFIG_ARCH']
CC_CHOSEN_CONFIG_LOSS = 'Balanced'
CC_CHOSEN_LEARNING_RATE = TUNED_MODEL_ARCHS['CCO']['CHOSEN_CONFIG_LR']
CC_CHOSEN_NUM_CLASSES = TUNED_MODEL_ARCHS['CCO']['NUM_CLASSES']
CC_dropout_rate   = 0.3
CC_hidden_dims    = CC_CHOSEN_CONFIG_ARCH
CC_input_dim      = 512
CC_output_dim     = CC_CHOSEN_NUM_CLASSES
CC_device         = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# CC_base_kwargs should match what PFP expects, e.g. {"input_dim":..., "hidden_dims":..., ...}
CC_base_kwargs    = {
    'input_dim'   : CC_input_dim,
    'hidden_dims' : CC_hidden_dims,
    'output_dim'  : CC_output_dim,
    'dropout_rate': CC_dropout_rate,
}

MF_root_dir     = r"mf_publish"
BP_root_dir     = r"bp_publish"
CC_root_dir     = r"cc_publish"

MF_kf_root      = os.path.join(MF_root_dir, "k_folds")
BP_kf_root      = os.path.join(BP_root_dir, "k_folds")
CC_kf_root      = os.path.join(CC_root_dir, "k_folds")

MF_fold_options = [20]
BP_fold_options = [20]
CC_fold_options = [20]

MF_raw_ensembles = load_kfold_ensembles(
    kf_root=MF_kf_root,
    fold_options=MF_fold_options,
    model_cls=PFP,
    model_kwargs=MF_base_kwargs,
    device=MF_device
)
BP_raw_ensembles = load_kfold_ensembles(
    kf_root=BP_kf_root,
    fold_options=BP_fold_options,
    model_cls=PFP,
    model_kwargs=BP_base_kwargs,
    device=BP_device
)
CC_raw_ensembles = load_kfold_ensembles(
    kf_root=CC_kf_root,
    fold_options=CC_fold_options,
    model_cls=PFP,
    model_kwargs=CC_base_kwargs,
    device=CC_device
)

Identify which genes are missing annotations in each subontology

In [36]:
# === CONFIG ===
dict_in_dir = Path(r"genomes_to_annotate_with_PlasmoFP/gene_dicts_out_complete_and_filtered_2")
out_dir     = Path(r"genomes_to_annotate_with_PlasmoFP/missing_by_aspect_new_2")
out_dir.mkdir(exist_ok=True)

# which GO keys correspond to each aspect
aspect_keys = {
    "MF": ["GO Function", "GO IEA Function"],
    "BP": ["GO Process",  "GO IEA Process"],
    "CC": ["GO Component","GO IEA Component"]
}

for pkl_fp in sorted(dict_in_dir.glob("*_gene_dict_complete.pkl")):
    species = pkl_fp.stem.rsplit("_gene_dict", 1)[0]

    # load the full gene_dict
    with open(pkl_fp, "rb") as f:
        full_dict = pickle.load(f)

    for asp, keys in aspect_keys.items():
        missing_dict   = {}
        missing_embeds = []

        for gene_id, rec in full_dict.items():
            emb = rec["embedding"]
            # skip genes without any embedding
            if emb is None:
                continue

            # sum of both buckets for this aspect
            has_terms = sum(len(rec[k]) for k in keys)
            if has_terms == 0:
                # this gene has NO terms in this aspect; include it
                missing_dict[gene_id] = rec
                missing_embeds.append(emb)

        # stack into array (n_missing × emb_dim)
        if missing_embeds:
            emb_arr = np.stack(missing_embeds, axis=0)
        else:
            # grab emb_dim from any entry
            sample = next(iter(full_dict.values()))
            emb_dim = sample["embedding"].shape[0] if sample["embedding"] is not None else 0
            emb_arr = np.zeros((0, emb_dim))

        # save subset dict
        out_pkl = out_dir / f"{species}_missing_{asp}_gene_dict.pkl"
        with open(out_pkl, "wb") as f:
            pickle.dump(missing_dict, f)

        # save embeddings
        out_npy = out_dir / f"{species}_missing_{asp}_embeddings.npy"
        np.save(out_npy, emb_arr)

        print(f"{species} [{asp}]: {len(missing_dict)} genes missing (with embeddings) → "
              f"{out_pkl.name}, {out_npy.name}")


PlasmoDB-68_PadleriG01 [MF]: 2422 genes missing (with embeddings) → PlasmoDB-68_PadleriG01_missing_MF_gene_dict.pkl, PlasmoDB-68_PadleriG01_missing_MF_embeddings.npy
PlasmoDB-68_PadleriG01 [BP]: 2863 genes missing (with embeddings) → PlasmoDB-68_PadleriG01_missing_BP_gene_dict.pkl, PlasmoDB-68_PadleriG01_missing_BP_embeddings.npy
PlasmoDB-68_PadleriG01 [CC]: 2453 genes missing (with embeddings) → PlasmoDB-68_PadleriG01_missing_CC_gene_dict.pkl, PlasmoDB-68_PadleriG01_missing_CC_embeddings.npy
PlasmoDB-68_PbergheiANKA [MF]: 1992 genes missing (with embeddings) → PlasmoDB-68_PbergheiANKA_missing_MF_gene_dict.pkl, PlasmoDB-68_PbergheiANKA_missing_MF_embeddings.npy
PlasmoDB-68_PbergheiANKA [BP]: 2272 genes missing (with embeddings) → PlasmoDB-68_PbergheiANKA_missing_BP_gene_dict.pkl, PlasmoDB-68_PbergheiANKA_missing_BP_embeddings.npy
PlasmoDB-68_PbergheiANKA [CC]: 1712 genes missing (with embeddings) → PlasmoDB-68_PbergheiANKA_missing_CC_gene_dict.pkl, PlasmoDB-68_PbergheiANKA_missing_CC_e

Use PlasmoFP to predict missing subontologies 

In [37]:
missing_dir = Path(r"genomes_to_annotate_with_PlasmoFP/missing_by_aspect_new_2")
output_dir = missing_dir / "ensemble_probs"
output_dir.mkdir(exist_ok=True)

aspect_to_ensembles = {
    "MF": MF_raw_ensembles,
    "BP": BP_raw_ensembles,
    "CC": CC_raw_ensembles,
}

batch_size = 512
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

pattern = re.compile(r"(?P<species>.+)_missing_(?P<asp>MF|BP|CC)_embeddings\.npy")

files = sorted(missing_dir.glob("*_missing_*_embeddings.npy"))
for npy_fp in tqdm(files, desc="Processing embeddings"):
    m = pattern.match(npy_fp.name)
    if not m:
        continue
    species = m.group("species")
    asp = m.group("asp")

    # Load embeddings and create DataLoader
    emb_arr = np.load(npy_fp)
    loader = DataLoader(
        TensorDataset(torch.from_numpy(emb_arr).float()),
        batch_size=batch_size,
        shuffle=False
    )

    
    kf_models = aspect_to_ensembles[asp]
    
    
    probs_dict = compute_ensemble_probs(kf_models, loader, device, predict_without_MCD)
    
    
    probs_20fold = probs_dict[20]  # Shape: (20, N, C)
    
    
    median_probs = np.median(probs_20fold, axis=0)  # Shape: (N, C)
    mad_probs = chunked_mad_over_runs(probs_20fold, chunk_size=50)  # Shape: (N, C)
    
    
    median_fp = output_dir / f"{species}_{asp}_median_20fold.npy"
    mad_fp = output_dir / f"{species}_{asp}_mad_20fold.npy"
    np.save(median_fp, median_probs)
    np.save(mad_fp, mad_probs)
    
    tqdm.write(f"{species} [{asp}]: median={median_probs.shape}, mad={mad_probs.shape}")

print("Median/MAD files saved")


Processing embeddings:   2%|▏         | 1/57 [00:06<05:42,  6.12s/it]

PlasmoDB-68_PadleriG01 [BP]: median=(2863, 1548), mad=(2863, 1548)


Processing embeddings:   4%|▎         | 2/57 [00:07<03:02,  3.31s/it]

PlasmoDB-68_PadleriG01 [CC]: median=(2453, 255), mad=(2453, 255)


Processing embeddings:   5%|▌         | 3/57 [00:10<02:45,  3.06s/it]

PlasmoDB-68_PadleriG01 [MF]: median=(2422, 744), mad=(2422, 744)


Processing embeddings:   7%|▋         | 4/57 [00:14<03:15,  3.69s/it]

PlasmoDB-68_PbergheiANKA [BP]: median=(2272, 1548), mad=(2272, 1548)


Processing embeddings:   9%|▉         | 5/57 [00:15<02:21,  2.71s/it]

PlasmoDB-68_PbergheiANKA [CC]: median=(1712, 255), mad=(1712, 255)


Processing embeddings:  11%|█         | 6/57 [00:18<02:10,  2.56s/it]

PlasmoDB-68_PbergheiANKA [MF]: median=(1992, 744), mad=(1992, 744)


Processing embeddings:  12%|█▏        | 7/57 [00:23<02:57,  3.55s/it]

PlasmoDB-68_PblacklockiG01 [BP]: median=(2741, 1548), mad=(2741, 1548)


Processing embeddings:  14%|█▍        | 8/57 [00:25<02:19,  2.85s/it]

PlasmoDB-68_PblacklockiG01 [CC]: median=(2362, 255), mad=(2362, 255)


Processing embeddings:  16%|█▌        | 9/57 [00:27<02:13,  2.79s/it]

PlasmoDB-68_PblacklockiG01 [MF]: median=(2328, 744), mad=(2328, 744)


Processing embeddings:  18%|█▊        | 10/57 [00:32<02:46,  3.55s/it]

PlasmoDB-68_Pchabaudichabaudi [BP]: median=(2432, 1548), mad=(2432, 1548)


Processing embeddings:  19%|█▉        | 11/57 [00:34<02:08,  2.79s/it]

PlasmoDB-68_Pchabaudichabaudi [CC]: median=(1929, 255), mad=(1929, 255)


Processing embeddings:  21%|██        | 12/57 [00:36<02:04,  2.77s/it]

PlasmoDB-68_Pchabaudichabaudi [MF]: median=(2085, 744), mad=(2085, 744)


Processing embeddings:  23%|██▎       | 13/57 [00:43<02:57,  4.04s/it]

PlasmoDB-68_PcoatneyiHackeri [BP]: median=(3247, 1548), mad=(3247, 1548)


Processing embeddings:  25%|██▍       | 14/57 [00:45<02:21,  3.30s/it]

PlasmoDB-68_PcoatneyiHackeri [CC]: median=(2845, 255), mad=(2845, 255)


Processing embeddings:  26%|██▋       | 15/57 [00:48<02:20,  3.34s/it]

PlasmoDB-68_PcoatneyiHackeri [MF]: median=(2835, 744), mad=(2835, 744)


Processing embeddings:  28%|██▊       | 16/57 [00:54<02:50,  4.15s/it]

PlasmoDB-68_PcynomolgiM [BP]: median=(2956, 1548), mad=(2956, 1548)


Processing embeddings:  30%|██▉       | 17/57 [00:56<02:20,  3.51s/it]

PlasmoDB-68_PcynomolgiM [CC]: median=(3575, 255), mad=(3575, 255)


Processing embeddings:  32%|███▏      | 18/57 [00:59<02:09,  3.33s/it]

PlasmoDB-68_PcynomolgiM [MF]: median=(2597, 744), mad=(2597, 744)


Processing embeddings:  33%|███▎      | 19/57 [01:03<02:13,  3.50s/it]

PlasmoDB-68_Pfalciparum3D7 [BP]: median=(1905, 1548), mad=(1905, 1548)


Processing embeddings:  35%|███▌      | 20/57 [01:04<01:36,  2.61s/it]

PlasmoDB-68_Pfalciparum3D7 [CC]: median=(933, 255), mad=(933, 255)


Processing embeddings:  37%|███▋      | 21/57 [01:06<01:27,  2.42s/it]

PlasmoDB-68_Pfalciparum3D7 [MF]: median=(1743, 744), mad=(1743, 744)


Processing embeddings:  39%|███▊      | 22/57 [01:13<02:16,  3.90s/it]

PlasmoDB-68_PfragileNilgiri [BP]: median=(3448, 1548), mad=(3448, 1548)


Processing embeddings:  40%|████      | 23/57 [01:15<01:49,  3.22s/it]

PlasmoDB-68_PfragileNilgiri [CC]: median=(3014, 255), mad=(3014, 255)


Processing embeddings:  42%|████▏     | 24/57 [01:18<01:48,  3.29s/it]

PlasmoDB-68_PfragileNilgiri [MF]: median=(3048, 744), mad=(3048, 744)


Processing embeddings:  44%|████▍     | 25/57 [01:24<02:07,  3.99s/it]

PlasmoDB-68_PgaboniG01 [BP]: median=(2732, 1548), mad=(2732, 1548)


Processing embeddings:  46%|████▌     | 26/57 [01:25<01:39,  3.22s/it]

PlasmoDB-68_PgaboniG01 [CC]: median=(2476, 255), mad=(2476, 255)


Processing embeddings:  47%|████▋     | 27/57 [01:28<01:30,  3.03s/it]

PlasmoDB-68_PgaboniG01 [MF]: median=(2304, 744), mad=(2304, 744)


Processing embeddings:  49%|████▉     | 28/57 [01:33<01:51,  3.85s/it]

PlasmoDB-68_Pgallinaceum8A [BP]: median=(2607, 1548), mad=(2607, 1548)


Processing embeddings:  51%|█████     | 29/57 [01:35<01:24,  3.03s/it]

PlasmoDB-68_Pgallinaceum8A [CC]: median=(2073, 255), mad=(2073, 255)


Processing embeddings:  53%|█████▎    | 30/57 [01:37<01:17,  2.87s/it]

PlasmoDB-68_Pgallinaceum8A [MF]: median=(2172, 744), mad=(2172, 744)


Processing embeddings:  54%|█████▍    | 31/57 [01:45<01:50,  4.27s/it]

PlasmoDB-68_PinuiSanAntonio1 [BP]: median=(3682, 1548), mad=(3682, 1548)


Processing embeddings:  56%|█████▌    | 32/57 [01:47<01:30,  3.61s/it]

PlasmoDB-68_PinuiSanAntonio1 [CC]: median=(3312, 255), mad=(3312, 255)


Processing embeddings:  58%|█████▊    | 33/57 [01:50<01:27,  3.64s/it]

PlasmoDB-68_PinuiSanAntonio1 [MF]: median=(3290, 744), mad=(3290, 744)


Processing embeddings:  60%|█████▉    | 34/57 [01:56<01:34,  4.12s/it]

PlasmoDB-68_PknowlesiH [BP]: median=(2592, 1548), mad=(2592, 1548)


Processing embeddings:  61%|██████▏   | 35/57 [01:57<01:10,  3.20s/it]

PlasmoDB-68_PknowlesiH [CC]: median=(1950, 255), mad=(1950, 255)


Processing embeddings:  63%|██████▎   | 36/57 [01:59<01:02,  2.98s/it]

PlasmoDB-68_PknowlesiH [MF]: median=(2220, 744), mad=(2220, 744)


Processing embeddings:  65%|██████▍   | 37/57 [02:05<01:16,  3.83s/it]

PlasmoDB-68_PmalariaeUG01 [BP]: median=(2868, 1548), mad=(2868, 1548)


Processing embeddings:  67%|██████▋   | 38/57 [02:06<00:57,  3.01s/it]

PlasmoDB-68_PmalariaeUG01 [CC]: median=(2035, 255), mad=(2035, 255)


Processing embeddings:  68%|██████▊   | 39/57 [02:09<00:54,  3.01s/it]

PlasmoDB-68_PmalariaeUG01 [MF]: median=(2478, 744), mad=(2478, 744)


Processing embeddings:  70%|███████   | 40/57 [02:15<01:04,  3.81s/it]

PlasmoDB-68_PovalecurtisiGH01 [BP]: median=(2824, 1548), mad=(2824, 1548)


Processing embeddings:  72%|███████▏  | 41/57 [02:16<00:49,  3.12s/it]

PlasmoDB-68_PovalecurtisiGH01 [CC]: median=(2718, 255), mad=(2718, 255)


Processing embeddings:  74%|███████▎  | 42/57 [02:19<00:45,  3.03s/it]

PlasmoDB-68_PovalecurtisiGH01 [MF]: median=(2488, 744), mad=(2488, 744)


Processing embeddings:  75%|███████▌  | 43/57 [02:25<00:55,  3.93s/it]

PlasmoDB-68_PovalewallikeriPowCR01 [BP]: median=(2976, 1548), mad=(2976, 1548)


Processing embeddings:  77%|███████▋  | 44/57 [02:26<00:41,  3.16s/it]

PlasmoDB-68_PovalewallikeriPowCR01 [CC]: median=(2556, 255), mad=(2556, 255)


Processing embeddings:  79%|███████▉  | 45/57 [02:30<00:38,  3.18s/it]

PlasmoDB-68_PovalewallikeriPowCR01 [MF]: median=(2584, 744), mad=(2584, 744)


Processing embeddings:  81%|████████  | 46/57 [02:36<00:45,  4.10s/it]

PlasmoDB-68_PreichenowiCDC [BP]: median=(2485, 1548), mad=(2485, 1548)


Processing embeddings:  82%|████████▏ | 47/57 [02:37<00:32,  3.22s/it]

PlasmoDB-68_PreichenowiCDC [CC]: median=(1772, 255), mad=(1772, 255)


Processing embeddings:  84%|████████▍ | 48/57 [02:40<00:26,  2.98s/it]

PlasmoDB-68_PreichenowiCDC [MF]: median=(2121, 744), mad=(2121, 744)


Processing embeddings:  86%|████████▌ | 49/57 [02:45<00:29,  3.74s/it]

PlasmoDB-68_PvinckeibrucechwattiDA [BP]: median=(2680, 1548), mad=(2680, 1548)


Processing embeddings:  88%|████████▊ | 50/57 [02:46<00:21,  3.03s/it]

PlasmoDB-68_PvinckeibrucechwattiDA [CC]: median=(2526, 255), mad=(2526, 255)


Processing embeddings:  89%|████████▉ | 51/57 [02:49<00:17,  2.87s/it]

PlasmoDB-68_PvinckeibrucechwattiDA [MF]: median=(2259, 744), mad=(2259, 744)


Processing embeddings:  91%|█████████ | 52/57 [02:54<00:18,  3.63s/it]

PlasmoDB-68_PvivaxSal1 [BP]: median=(2606, 1548), mad=(2606, 1548)


Processing embeddings:  93%|█████████▎| 53/57 [02:56<00:11,  2.93s/it]

PlasmoDB-68_PvivaxSal1 [CC]: median=(2263, 255), mad=(2263, 255)


Processing embeddings:  95%|█████████▍| 54/57 [02:58<00:08,  2.83s/it]

PlasmoDB-68_PvivaxSal1 [MF]: median=(2341, 744), mad=(2341, 744)


Processing embeddings:  96%|█████████▋| 55/57 [03:03<00:07,  3.54s/it]

PlasmoDB-68_Pyoeliiyoelii17XNL2023 [BP]: median=(2564, 1548), mad=(2564, 1548)


Processing embeddings:  98%|█████████▊| 56/57 [03:05<00:02,  2.96s/it]

PlasmoDB-68_Pyoeliiyoelii17XNL2023 [CC]: median=(2961, 255), mad=(2961, 255)


Processing embeddings: 100%|██████████| 57/57 [03:07<00:00,  3.30s/it]

PlasmoDB-68_Pyoeliiyoelii17XNL2023 [MF]: median=(2169, 744), mad=(2169, 744)
Median/MAD files saved





In [38]:
output_dir

PosixPath('genomes_to_annotate_with_PlasmoFP/missing_by_aspect_new_2/ensemble_probs')

In [47]:
eff_thresh = {
    "Function": json.load(open("effective_thresholds_ensemble20_function.json")),
    "Process": json.load(open("effective_thresholds_ensemble20_process.json")),
    "Component": json.load(open("effective_thresholds_ensemble20_component.json")),
}

for aspect, term_dict in eff_thresh.items():
    for term, fdr_map in term_dict.items():
        new_map = {}
        for fdr_str, thr in fdr_map.items():
            if thr is not None:
                new_map[round(float(fdr_str), 2)] = thr
        eff_thresh[aspect][term] = new_map

mlb_by_asp = {
    "MF": pickle.load(open("function_mlb.pkl", "rb")),
    "BP": pickle.load(open("process_mlb.pkl", "rb")),
    "CC": pickle.load(open("component_mlb.pkl", "rb"))
}

median_files = sorted(output_dir.glob("*_median_20fold.npy"))
all_predictions = {}

for med_fp in tqdm(median_files, desc="Generating predictions with corrected thresholds", leave=False):
    parts = med_fp.stem.split("_")
    asp = parts[-3]  # Extract aspect (MF/BP/CC)
    species = "_".join(parts[:-3])  # Extract species name
    
    median_probs = np.load(med_fp)
    mad_fp = med_fp.with_name(med_fp.name.replace("_median_", "_mad_"))
    MAD_probs = np.load(mad_fp)
    
    gene_dict_fp = missing_dir / f"{species}_missing_{asp}_gene_dict.pkl"
    gene_dict = pickle.load(open(gene_dict_fp, "rb"))
    test_entries = list(gene_dict.keys())
    
    aspect_name = {"MF": "Function", "BP": "Process", "CC": "Component"}[asp]
    thresholds = eff_thresh[aspect_name]
    mlb = mlb_by_asp[asp]
    
    preds_for_aspect = {}
    target_fdrs = [0.01, 0.05, 0.10, 0.20, 0.30]
    
    for target_fdr in target_fdrs:
        tqdm.write(f"{species}/{asp} {target_fdr}")
        pred_dict = build_pred_annots_dict(
            median_probs, MAD_probs, thresholds,
            test_entries=test_entries,
            terms=list(mlb.classes_),
            lambda_val=1,
            target_fdr=target_fdr
        )
        preds_for_aspect[round(target_fdr, 3)] = pred_dict
    
    all_predictions.setdefault(species, {})[asp] = preds_for_aspect

out_path = missing_dir / "all_PUF_predictions.pkl"
with open(out_path, "wb") as f:
    pickle.dump(all_predictions, f)

Generating predictions with corrected thresholds:   0%|          | 0/57 [00:00<?, ?it/s]

PlasmoDB-68_PadleriG01/BP 0.01


Building predictions: 100%|██████████| 2863/2863 [00:39<00:00, 72.14it/s]
Generating predictions with corrected thresholds:   0%|          | 0/57 [00:39<?, ?it/s]

PlasmoDB-68_PadleriG01/BP 0.05


Building predictions: 100%|██████████| 2863/2863 [00:38<00:00, 73.94it/s]
Generating predictions with corrected thresholds:   0%|          | 0/57 [01:18<?, ?it/s]

PlasmoDB-68_PadleriG01/BP 0.1


Building predictions: 100%|██████████| 2863/2863 [00:38<00:00, 73.65it/s]
Generating predictions with corrected thresholds:   0%|          | 0/57 [01:57<?, ?it/s]

PlasmoDB-68_PadleriG01/BP 0.2


Building predictions: 100%|██████████| 2863/2863 [00:39<00:00, 71.85it/s]
Generating predictions with corrected thresholds:   0%|          | 0/57 [02:37<?, ?it/s]

PlasmoDB-68_PadleriG01/BP 0.3


Building predictions: 100%|██████████| 2863/2863 [00:40<00:00, 70.90it/s]
Generating predictions with corrected thresholds:   2%|▏         | 1/57 [03:17<3:04:28, 197.65s/it]

PlasmoDB-68_PadleriG01/CC 0.01


Building predictions: 100%|██████████| 2453/2453 [00:05<00:00, 466.44it/s]
Generating predictions with corrected thresholds:   2%|▏         | 1/57 [03:22<3:04:28, 197.65s/it]

PlasmoDB-68_PadleriG01/CC 0.05


Building predictions: 100%|██████████| 2453/2453 [00:05<00:00, 468.70it/s]
Generating predictions with corrected thresholds:   2%|▏         | 1/57 [03:28<3:04:28, 197.65s/it]

PlasmoDB-68_PadleriG01/CC 0.1


Building predictions: 100%|██████████| 2453/2453 [00:05<00:00, 453.77it/s]
Generating predictions with corrected thresholds:   2%|▏         | 1/57 [03:33<3:04:28, 197.65s/it]

PlasmoDB-68_PadleriG01/CC 0.2


Building predictions: 100%|██████████| 2453/2453 [00:05<00:00, 429.00it/s]
Generating predictions with corrected thresholds:   2%|▏         | 1/57 [03:39<3:04:28, 197.65s/it]

PlasmoDB-68_PadleriG01/CC 0.3


Building predictions: 100%|██████████| 2453/2453 [00:05<00:00, 425.16it/s]
Generating predictions with corrected thresholds:   4%|▎         | 2/57 [03:45<1:29:23, 97.52s/it] 

PlasmoDB-68_PadleriG01/MF 0.01


Building predictions: 100%|██████████| 2422/2422 [00:15<00:00, 153.20it/s]
Generating predictions with corrected thresholds:   4%|▎         | 2/57 [04:00<1:29:23, 97.52s/it]

PlasmoDB-68_PadleriG01/MF 0.05


Building predictions: 100%|██████████| 2422/2422 [00:16<00:00, 144.14it/s]
Generating predictions with corrected thresholds:   4%|▎         | 2/57 [04:17<1:29:23, 97.52s/it]

PlasmoDB-68_PadleriG01/MF 0.1


Building predictions: 100%|██████████| 2422/2422 [00:16<00:00, 149.83it/s]
Generating predictions with corrected thresholds:   4%|▎         | 2/57 [04:33<1:29:23, 97.52s/it]

PlasmoDB-68_PadleriG01/MF 0.2


Building predictions: 100%|██████████| 2422/2422 [00:16<00:00, 150.84it/s]
Generating predictions with corrected thresholds:   4%|▎         | 2/57 [04:49<1:29:23, 97.52s/it]

PlasmoDB-68_PadleriG01/MF 0.3


Building predictions: 100%|██████████| 2422/2422 [00:16<00:00, 143.73it/s]
Generating predictions with corrected thresholds:   5%|▌         | 3/57 [05:06<1:21:16, 90.31s/it]

PlasmoDB-68_PbergheiANKA/BP 0.01


Building predictions: 100%|██████████| 2272/2272 [00:31<00:00, 72.86it/s]
Generating predictions with corrected thresholds:   5%|▌         | 3/57 [05:38<1:21:16, 90.31s/it]

PlasmoDB-68_PbergheiANKA/BP 0.05


Building predictions: 100%|██████████| 2272/2272 [00:31<00:00, 72.85it/s]
Generating predictions with corrected thresholds:   5%|▌         | 3/57 [06:09<1:21:16, 90.31s/it]

PlasmoDB-68_PbergheiANKA/BP 0.1


Building predictions: 100%|██████████| 2272/2272 [00:30<00:00, 73.81it/s]
Generating predictions with corrected thresholds:   5%|▌         | 3/57 [06:39<1:21:16, 90.31s/it]

PlasmoDB-68_PbergheiANKA/BP 0.2


Building predictions: 100%|██████████| 2272/2272 [00:30<00:00, 73.32it/s]
Generating predictions with corrected thresholds:   5%|▌         | 3/57 [07:10<1:21:16, 90.31s/it]

PlasmoDB-68_PbergheiANKA/BP 0.3


Building predictions: 100%|██████████| 2272/2272 [00:31<00:00, 73.11it/s]
Generating predictions with corrected thresholds:   7%|▋         | 4/57 [07:42<1:42:25, 115.95s/it]

PlasmoDB-68_PbergheiANKA/CC 0.01


Building predictions: 100%|██████████| 1712/1712 [00:03<00:00, 472.71it/s]
Generating predictions with corrected thresholds:   7%|▋         | 4/57 [07:45<1:42:25, 115.95s/it]

PlasmoDB-68_PbergheiANKA/CC 0.05


Building predictions: 100%|██████████| 1712/1712 [00:03<00:00, 445.21it/s]
Generating predictions with corrected thresholds:   7%|▋         | 4/57 [07:49<1:42:25, 115.95s/it]

PlasmoDB-68_PbergheiANKA/CC 0.1


Building predictions: 100%|██████████| 1712/1712 [00:03<00:00, 469.69it/s]
Generating predictions with corrected thresholds:   7%|▋         | 4/57 [07:53<1:42:25, 115.95s/it]

PlasmoDB-68_PbergheiANKA/CC 0.2


Building predictions: 100%|██████████| 1712/1712 [00:03<00:00, 474.78it/s]
Generating predictions with corrected thresholds:   7%|▋         | 4/57 [07:56<1:42:25, 115.95s/it]

PlasmoDB-68_PbergheiANKA/CC 0.3


Building predictions: 100%|██████████| 1712/1712 [00:03<00:00, 469.32it/s]
Generating predictions with corrected thresholds:   9%|▉         | 5/57 [08:00<1:10:00, 80.77s/it] 

PlasmoDB-68_PbergheiANKA/MF 0.01


Building predictions: 100%|██████████| 1992/1992 [00:12<00:00, 154.11it/s]
Generating predictions with corrected thresholds:   9%|▉         | 5/57 [08:13<1:10:00, 80.77s/it]

PlasmoDB-68_PbergheiANKA/MF 0.05


Building predictions: 100%|██████████| 1992/1992 [00:12<00:00, 154.59it/s]
Generating predictions with corrected thresholds:   9%|▉         | 5/57 [08:26<1:10:00, 80.77s/it]

PlasmoDB-68_PbergheiANKA/MF 0.1


Building predictions: 100%|██████████| 1992/1992 [00:12<00:00, 153.95it/s]
Generating predictions with corrected thresholds:   9%|▉         | 5/57 [08:39<1:10:00, 80.77s/it]

PlasmoDB-68_PbergheiANKA/MF 0.2


Building predictions: 100%|██████████| 1992/1992 [00:13<00:00, 151.38it/s]
Generating predictions with corrected thresholds:   9%|▉         | 5/57 [08:52<1:10:00, 80.77s/it]

PlasmoDB-68_PbergheiANKA/MF 0.3


Building predictions: 100%|██████████| 1992/1992 [00:12<00:00, 153.81it/s]
Generating predictions with corrected thresholds:  11%|█         | 6/57 [09:05<1:04:04, 75.37s/it]

PlasmoDB-68_PblacklockiG01/BP 0.01


Building predictions: 100%|██████████| 2741/2741 [00:37<00:00, 72.57it/s]
Generating predictions with corrected thresholds:  11%|█         | 6/57 [09:43<1:04:04, 75.37s/it]

PlasmoDB-68_PblacklockiG01/BP 0.05


Building predictions: 100%|██████████| 2741/2741 [00:37<00:00, 73.82it/s]
Generating predictions with corrected thresholds:  11%|█         | 6/57 [10:20<1:04:04, 75.37s/it]

PlasmoDB-68_PblacklockiG01/BP 0.1


Building predictions: 100%|██████████| 2741/2741 [00:37<00:00, 72.84it/s]
Generating predictions with corrected thresholds:  11%|█         | 6/57 [10:57<1:04:04, 75.37s/it]

PlasmoDB-68_PblacklockiG01/BP 0.2


Building predictions: 100%|██████████| 2741/2741 [00:37<00:00, 73.84it/s]
Generating predictions with corrected thresholds:  11%|█         | 6/57 [11:35<1:04:04, 75.37s/it]

PlasmoDB-68_PblacklockiG01/BP 0.3


Building predictions: 100%|██████████| 2741/2741 [00:37<00:00, 73.14it/s]
Generating predictions with corrected thresholds:  12%|█▏        | 7/57 [12:12<1:33:16, 111.92s/it]

PlasmoDB-68_PblacklockiG01/CC 0.01


Building predictions: 100%|██████████| 2362/2362 [00:05<00:00, 469.97it/s]
Generating predictions with corrected thresholds:  12%|█▏        | 7/57 [12:17<1:33:16, 111.92s/it]

PlasmoDB-68_PblacklockiG01/CC 0.05


Building predictions: 100%|██████████| 2362/2362 [00:04<00:00, 475.98it/s]
Generating predictions with corrected thresholds:  12%|█▏        | 7/57 [12:22<1:33:16, 111.92s/it]

PlasmoDB-68_PblacklockiG01/CC 0.1


Building predictions: 100%|██████████| 2362/2362 [00:04<00:00, 474.10it/s]
Generating predictions with corrected thresholds:  12%|█▏        | 7/57 [12:27<1:33:16, 111.92s/it]

PlasmoDB-68_PblacklockiG01/CC 0.2


Building predictions: 100%|██████████| 2362/2362 [00:05<00:00, 470.99it/s]
Generating predictions with corrected thresholds:  12%|█▏        | 7/57 [12:32<1:33:16, 111.92s/it]

PlasmoDB-68_PblacklockiG01/CC 0.3


Building predictions: 100%|██████████| 2362/2362 [00:04<00:00, 472.49it/s]
Generating predictions with corrected thresholds:  14%|█▍        | 8/57 [12:37<1:08:48, 84.26s/it] 

PlasmoDB-68_PblacklockiG01/MF 0.01


Building predictions: 100%|██████████| 2328/2328 [00:15<00:00, 152.03it/s]
Generating predictions with corrected thresholds:  14%|█▍        | 8/57 [12:52<1:08:48, 84.26s/it]

PlasmoDB-68_PblacklockiG01/MF 0.05


Building predictions: 100%|██████████| 2328/2328 [00:15<00:00, 155.13it/s]
Generating predictions with corrected thresholds:  14%|█▍        | 8/57 [13:07<1:08:48, 84.26s/it]

PlasmoDB-68_PblacklockiG01/MF 0.1


Building predictions: 100%|██████████| 2328/2328 [00:15<00:00, 154.94it/s]
Generating predictions with corrected thresholds:  14%|█▍        | 8/57 [13:22<1:08:48, 84.26s/it]

PlasmoDB-68_PblacklockiG01/MF 0.2


Building predictions: 100%|██████████| 2328/2328 [00:15<00:00, 151.01it/s]
Generating predictions with corrected thresholds:  14%|█▍        | 8/57 [13:38<1:08:48, 84.26s/it]

PlasmoDB-68_PblacklockiG01/MF 0.3


Building predictions: 100%|██████████| 2328/2328 [00:15<00:00, 149.50it/s]
Generating predictions with corrected thresholds:  16%|█▌        | 9/57 [13:53<1:05:25, 81.79s/it]

PlasmoDB-68_Pchabaudichabaudi/BP 0.01


Building predictions: 100%|██████████| 2432/2432 [00:33<00:00, 72.72it/s]
Generating predictions with corrected thresholds:  16%|█▌        | 9/57 [14:27<1:05:25, 81.79s/it]

PlasmoDB-68_Pchabaudichabaudi/BP 0.05


Building predictions: 100%|██████████| 2432/2432 [00:33<00:00, 73.46it/s]
Generating predictions with corrected thresholds:  16%|█▌        | 9/57 [15:00<1:05:25, 81.79s/it]

PlasmoDB-68_Pchabaudichabaudi/BP 0.1


Building predictions: 100%|██████████| 2432/2432 [00:32<00:00, 73.95it/s]
Generating predictions with corrected thresholds:  16%|█▌        | 9/57 [15:33<1:05:25, 81.79s/it]

PlasmoDB-68_Pchabaudichabaudi/BP 0.2


Building predictions: 100%|██████████| 2432/2432 [00:31<00:00, 78.20it/s]
Generating predictions with corrected thresholds:  16%|█▌        | 9/57 [16:04<1:05:25, 81.79s/it]

PlasmoDB-68_Pchabaudichabaudi/BP 0.3


Building predictions: 100%|██████████| 2432/2432 [00:31<00:00, 78.42it/s]
Generating predictions with corrected thresholds:  18%|█▊        | 10/57 [16:35<1:23:22, 106.43s/it]

PlasmoDB-68_Pchabaudichabaudi/CC 0.01


Building predictions: 100%|██████████| 1929/1929 [00:03<00:00, 506.05it/s]
Generating predictions with corrected thresholds:  18%|█▊        | 10/57 [16:39<1:23:22, 106.43s/it]

PlasmoDB-68_Pchabaudichabaudi/CC 0.05


Building predictions: 100%|██████████| 1929/1929 [00:03<00:00, 505.49it/s]
Generating predictions with corrected thresholds:  18%|█▊        | 10/57 [16:43<1:23:22, 106.43s/it]

PlasmoDB-68_Pchabaudichabaudi/CC 0.1


Building predictions: 100%|██████████| 1929/1929 [00:03<00:00, 505.06it/s]
Generating predictions with corrected thresholds:  18%|█▊        | 10/57 [16:46<1:23:22, 106.43s/it]

PlasmoDB-68_Pchabaudichabaudi/CC 0.2


Building predictions: 100%|██████████| 1929/1929 [00:03<00:00, 505.23it/s]
Generating predictions with corrected thresholds:  18%|█▊        | 10/57 [16:50<1:23:22, 106.43s/it]

PlasmoDB-68_Pchabaudichabaudi/CC 0.3


Building predictions: 100%|██████████| 1929/1929 [00:03<00:00, 503.42it/s]
Generating predictions with corrected thresholds:  19%|█▉        | 11/57 [16:54<1:01:06, 79.71s/it] 

PlasmoDB-68_Pchabaudichabaudi/MF 0.01


Building predictions: 100%|██████████| 2085/2085 [00:12<00:00, 166.96it/s]
Generating predictions with corrected thresholds:  19%|█▉        | 11/57 [17:07<1:01:06, 79.71s/it]

PlasmoDB-68_Pchabaudichabaudi/MF 0.05


Building predictions: 100%|██████████| 2085/2085 [00:12<00:00, 166.71it/s]
Generating predictions with corrected thresholds:  19%|█▉        | 11/57 [17:19<1:01:06, 79.71s/it]

PlasmoDB-68_Pchabaudichabaudi/MF 0.1


Building predictions: 100%|██████████| 2085/2085 [00:12<00:00, 165.22it/s]
Generating predictions with corrected thresholds:  19%|█▉        | 11/57 [17:32<1:01:06, 79.71s/it]

PlasmoDB-68_Pchabaudichabaudi/MF 0.2


Building predictions: 100%|██████████| 2085/2085 [00:12<00:00, 164.22it/s]
Generating predictions with corrected thresholds:  19%|█▉        | 11/57 [17:44<1:01:06, 79.71s/it]

PlasmoDB-68_Pchabaudichabaudi/MF 0.3


Building predictions: 100%|██████████| 2085/2085 [00:12<00:00, 166.73it/s]
Generating predictions with corrected thresholds:  21%|██        | 12/57 [17:57<55:56, 74.58s/it]  

PlasmoDB-68_PcoatneyiHackeri/BP 0.01


Building predictions: 100%|██████████| 3247/3247 [00:41<00:00, 78.67it/s]
Generating predictions with corrected thresholds:  21%|██        | 12/57 [18:38<55:56, 74.58s/it]

PlasmoDB-68_PcoatneyiHackeri/BP 0.05


Building predictions: 100%|██████████| 3247/3247 [00:41<00:00, 78.22it/s]
Generating predictions with corrected thresholds:  21%|██        | 12/57 [19:20<55:56, 74.58s/it]

PlasmoDB-68_PcoatneyiHackeri/BP 0.1


Building predictions: 100%|██████████| 3247/3247 [00:41<00:00, 78.44it/s]
Generating predictions with corrected thresholds:  21%|██        | 12/57 [20:01<55:56, 74.58s/it]

PlasmoDB-68_PcoatneyiHackeri/BP 0.2


Building predictions: 100%|██████████| 3247/3247 [00:41<00:00, 78.49it/s]
Generating predictions with corrected thresholds:  21%|██        | 12/57 [20:43<55:56, 74.58s/it]

PlasmoDB-68_PcoatneyiHackeri/BP 0.3


Building predictions: 100%|██████████| 3247/3247 [00:41<00:00, 78.27it/s]
Generating predictions with corrected thresholds:  23%|██▎       | 13/57 [21:24<1:24:07, 114.72s/it]

PlasmoDB-68_PcoatneyiHackeri/CC 0.01


Building predictions: 100%|██████████| 2845/2845 [00:05<00:00, 505.91it/s]
Generating predictions with corrected thresholds:  23%|██▎       | 13/57 [21:30<1:24:07, 114.72s/it]

PlasmoDB-68_PcoatneyiHackeri/CC 0.05


Building predictions: 100%|██████████| 2845/2845 [00:05<00:00, 506.88it/s]
Generating predictions with corrected thresholds:  23%|██▎       | 13/57 [21:35<1:24:07, 114.72s/it]

PlasmoDB-68_PcoatneyiHackeri/CC 0.1


Building predictions: 100%|██████████| 2845/2845 [00:05<00:00, 507.68it/s]
Generating predictions with corrected thresholds:  23%|██▎       | 13/57 [21:41<1:24:07, 114.72s/it]

PlasmoDB-68_PcoatneyiHackeri/CC 0.2


Building predictions: 100%|██████████| 2845/2845 [00:05<00:00, 508.37it/s]
Generating predictions with corrected thresholds:  23%|██▎       | 13/57 [21:47<1:24:07, 114.72s/it]

PlasmoDB-68_PcoatneyiHackeri/CC 0.3


Building predictions: 100%|██████████| 2845/2845 [00:05<00:00, 507.21it/s]
Generating predictions with corrected thresholds:  25%|██▍       | 14/57 [21:52<1:03:27, 88.55s/it] 

PlasmoDB-68_PcoatneyiHackeri/MF 0.01


Building predictions: 100%|██████████| 2835/2835 [00:17<00:00, 165.89it/s]
Generating predictions with corrected thresholds:  25%|██▍       | 14/57 [22:09<1:03:27, 88.55s/it]

PlasmoDB-68_PcoatneyiHackeri/MF 0.05


Building predictions: 100%|██████████| 2835/2835 [00:16<00:00, 167.14it/s]
Generating predictions with corrected thresholds:  25%|██▍       | 14/57 [22:26<1:03:27, 88.55s/it]

PlasmoDB-68_PcoatneyiHackeri/MF 0.1


Building predictions: 100%|██████████| 2835/2835 [00:17<00:00, 166.02it/s]
Generating predictions with corrected thresholds:  25%|██▍       | 14/57 [22:43<1:03:27, 88.55s/it]

PlasmoDB-68_PcoatneyiHackeri/MF 0.2


Building predictions: 100%|██████████| 2835/2835 [00:17<00:00, 166.42it/s]
Generating predictions with corrected thresholds:  25%|██▍       | 14/57 [23:00<1:03:27, 88.55s/it]

PlasmoDB-68_PcoatneyiHackeri/MF 0.3


Building predictions: 100%|██████████| 2835/2835 [00:17<00:00, 165.34it/s]
Generating predictions with corrected thresholds:  26%|██▋       | 15/57 [23:17<1:01:18, 87.58s/it]

PlasmoDB-68_PcynomolgiM/BP 0.01


Building predictions: 100%|██████████| 2956/2956 [00:37<00:00, 77.92it/s]
Generating predictions with corrected thresholds:  26%|██▋       | 15/57 [23:55<1:01:18, 87.58s/it]

PlasmoDB-68_PcynomolgiM/BP 0.05


Building predictions: 100%|██████████| 2956/2956 [00:41<00:00, 71.47it/s]
Generating predictions with corrected thresholds:  26%|██▋       | 15/57 [24:37<1:01:18, 87.58s/it]

PlasmoDB-68_PcynomolgiM/BP 0.1


Building predictions: 100%|██████████| 2956/2956 [00:40<00:00, 72.68it/s]
Generating predictions with corrected thresholds:  26%|██▋       | 15/57 [25:17<1:01:18, 87.58s/it]

PlasmoDB-68_PcynomolgiM/BP 0.2


Building predictions: 100%|██████████| 2956/2956 [00:40<00:00, 73.23it/s]
Generating predictions with corrected thresholds:  26%|██▋       | 15/57 [25:58<1:01:18, 87.58s/it]

PlasmoDB-68_PcynomolgiM/BP 0.3


Building predictions: 100%|██████████| 2956/2956 [00:40<00:00, 73.27it/s]
Generating predictions with corrected thresholds:  28%|██▊       | 16/57 [26:38<1:23:07, 121.64s/it]

PlasmoDB-68_PcynomolgiM/CC 0.01


Building predictions: 100%|██████████| 3575/3575 [00:07<00:00, 468.59it/s]
Generating predictions with corrected thresholds:  28%|██▊       | 16/57 [26:46<1:23:07, 121.64s/it]

PlasmoDB-68_PcynomolgiM/CC 0.05


Building predictions: 100%|██████████| 3575/3575 [00:07<00:00, 461.18it/s]
Generating predictions with corrected thresholds:  28%|██▊       | 16/57 [26:54<1:23:07, 121.64s/it]

PlasmoDB-68_PcynomolgiM/CC 0.1


Building predictions: 100%|██████████| 3575/3575 [00:08<00:00, 446.74it/s]
Generating predictions with corrected thresholds:  28%|██▊       | 16/57 [27:02<1:23:07, 121.64s/it]

PlasmoDB-68_PcynomolgiM/CC 0.2


Building predictions: 100%|██████████| 3575/3575 [00:07<00:00, 467.05it/s]
Generating predictions with corrected thresholds:  28%|██▊       | 16/57 [27:09<1:23:07, 121.64s/it]

PlasmoDB-68_PcynomolgiM/CC 0.3


Building predictions: 100%|██████████| 3575/3575 [00:07<00:00, 471.53it/s]
Generating predictions with corrected thresholds:  30%|██▉       | 17/57 [27:17<1:04:27, 96.69s/it] 

PlasmoDB-68_PcynomolgiM/MF 0.01


Building predictions: 100%|██████████| 2597/2597 [00:16<00:00, 154.17it/s]
Generating predictions with corrected thresholds:  30%|██▉       | 17/57 [27:34<1:04:27, 96.69s/it]

PlasmoDB-68_PcynomolgiM/MF 0.05


Building predictions: 100%|██████████| 2597/2597 [00:17<00:00, 152.16it/s]
Generating predictions with corrected thresholds:  30%|██▉       | 17/57 [27:51<1:04:27, 96.69s/it]

PlasmoDB-68_PcynomolgiM/MF 0.1


Building predictions: 100%|██████████| 2597/2597 [00:17<00:00, 151.18it/s]
Generating predictions with corrected thresholds:  30%|██▉       | 17/57 [28:08<1:04:27, 96.69s/it]

PlasmoDB-68_PcynomolgiM/MF 0.2


Building predictions: 100%|██████████| 2597/2597 [00:16<00:00, 153.35it/s]
Generating predictions with corrected thresholds:  30%|██▉       | 17/57 [28:25<1:04:27, 96.69s/it]

PlasmoDB-68_PcynomolgiM/MF 0.3


Building predictions: 100%|██████████| 2597/2597 [00:16<00:00, 153.07it/s]
Generating predictions with corrected thresholds:  32%|███▏      | 18/57 [28:42<1:00:34, 93.18s/it]

PlasmoDB-68_Pfalciparum3D7/BP 0.01


Building predictions: 100%|██████████| 1905/1905 [00:26<00:00, 71.94it/s]
Generating predictions with corrected thresholds:  32%|███▏      | 18/57 [29:08<1:00:34, 93.18s/it]

PlasmoDB-68_Pfalciparum3D7/BP 0.05


Building predictions: 100%|██████████| 1905/1905 [00:26<00:00, 73.16it/s]
Generating predictions with corrected thresholds:  32%|███▏      | 18/57 [29:34<1:00:34, 93.18s/it]

PlasmoDB-68_Pfalciparum3D7/BP 0.1


Building predictions: 100%|██████████| 1905/1905 [00:26<00:00, 72.56it/s]
Generating predictions with corrected thresholds:  32%|███▏      | 18/57 [30:01<1:00:34, 93.18s/it]

PlasmoDB-68_Pfalciparum3D7/BP 0.2


Building predictions: 100%|██████████| 1905/1905 [00:25<00:00, 74.08it/s]
Generating predictions with corrected thresholds:  32%|███▏      | 18/57 [30:26<1:00:34, 93.18s/it]

PlasmoDB-68_Pfalciparum3D7/BP 0.3


Building predictions: 100%|██████████| 1905/1905 [00:25<00:00, 73.43it/s]
Generating predictions with corrected thresholds:  33%|███▎      | 19/57 [30:52<1:06:06, 104.38s/it]

PlasmoDB-68_Pfalciparum3D7/CC 0.01


Building predictions: 100%|██████████| 933/933 [00:01<00:00, 476.75it/s]
Generating predictions with corrected thresholds:  33%|███▎      | 19/57 [30:54<1:06:06, 104.38s/it]

PlasmoDB-68_Pfalciparum3D7/CC 0.05


Building predictions: 100%|██████████| 933/933 [00:01<00:00, 469.64it/s]
Generating predictions with corrected thresholds:  33%|███▎      | 19/57 [30:56<1:06:06, 104.38s/it]

PlasmoDB-68_Pfalciparum3D7/CC 0.1


Building predictions: 100%|██████████| 933/933 [00:01<00:00, 470.78it/s]
Generating predictions with corrected thresholds:  33%|███▎      | 19/57 [30:58<1:06:06, 104.38s/it]

PlasmoDB-68_Pfalciparum3D7/CC 0.2


Building predictions: 100%|██████████| 933/933 [00:02<00:00, 415.01it/s]
Generating predictions with corrected thresholds:  33%|███▎      | 19/57 [31:01<1:06:06, 104.38s/it]

PlasmoDB-68_Pfalciparum3D7/CC 0.3


Building predictions: 100%|██████████| 933/933 [00:02<00:00, 464.89it/s]
Generating predictions with corrected thresholds:  35%|███▌      | 20/57 [31:03<46:56, 76.11s/it]   

PlasmoDB-68_Pfalciparum3D7/MF 0.01


Building predictions: 100%|██████████| 1743/1743 [00:11<00:00, 155.65it/s]
Generating predictions with corrected thresholds:  35%|███▌      | 20/57 [31:14<46:56, 76.11s/it]

PlasmoDB-68_Pfalciparum3D7/MF 0.05


Building predictions: 100%|██████████| 1743/1743 [00:11<00:00, 153.47it/s]
Generating predictions with corrected thresholds:  35%|███▌      | 20/57 [31:25<46:56, 76.11s/it]

PlasmoDB-68_Pfalciparum3D7/MF 0.1


Building predictions: 100%|██████████| 1743/1743 [00:11<00:00, 155.14it/s]
Generating predictions with corrected thresholds:  35%|███▌      | 20/57 [31:36<46:56, 76.11s/it]

PlasmoDB-68_Pfalciparum3D7/MF 0.2


Building predictions: 100%|██████████| 1743/1743 [00:11<00:00, 154.76it/s]
Generating predictions with corrected thresholds:  35%|███▌      | 20/57 [31:48<46:56, 76.11s/it]

PlasmoDB-68_Pfalciparum3D7/MF 0.3


Building predictions: 100%|██████████| 1743/1743 [00:11<00:00, 151.61it/s]
Generating predictions with corrected thresholds:  37%|███▋      | 21/57 [31:59<42:08, 70.25s/it]

PlasmoDB-68_PfragileNilgiri/BP 0.01


Building predictions: 100%|██████████| 3448/3448 [00:47<00:00, 72.81it/s]
Generating predictions with corrected thresholds:  37%|███▋      | 21/57 [32:47<42:08, 70.25s/it]

PlasmoDB-68_PfragileNilgiri/BP 0.05


Building predictions: 100%|██████████| 3448/3448 [00:48<00:00, 71.13it/s]
Generating predictions with corrected thresholds:  37%|███▋      | 21/57 [33:35<42:08, 70.25s/it]

PlasmoDB-68_PfragileNilgiri/BP 0.1


Building predictions: 100%|██████████| 3448/3448 [00:48<00:00, 71.16it/s]
Generating predictions with corrected thresholds:  37%|███▋      | 21/57 [34:23<42:08, 70.25s/it]

PlasmoDB-68_PfragileNilgiri/BP 0.2


Building predictions: 100%|██████████| 3448/3448 [00:48<00:00, 70.93it/s]
Generating predictions with corrected thresholds:  37%|███▋      | 21/57 [35:12<42:08, 70.25s/it]

PlasmoDB-68_PfragileNilgiri/BP 0.3


Building predictions: 100%|██████████| 3448/3448 [00:48<00:00, 70.76it/s]
Generating predictions with corrected thresholds:  39%|███▊      | 22/57 [36:01<1:10:59, 121.70s/it]

PlasmoDB-68_PfragileNilgiri/CC 0.01


Building predictions: 100%|██████████| 3014/3014 [00:06<00:00, 438.53it/s]
Generating predictions with corrected thresholds:  39%|███▊      | 22/57 [36:08<1:10:59, 121.70s/it]

PlasmoDB-68_PfragileNilgiri/CC 0.05


Building predictions: 100%|██████████| 3014/3014 [00:06<00:00, 453.84it/s]
Generating predictions with corrected thresholds:  39%|███▊      | 22/57 [36:14<1:10:59, 121.70s/it]

PlasmoDB-68_PfragileNilgiri/CC 0.1


Building predictions: 100%|██████████| 3014/3014 [00:06<00:00, 447.08it/s]
Generating predictions with corrected thresholds:  39%|███▊      | 22/57 [36:21<1:10:59, 121.70s/it]

PlasmoDB-68_PfragileNilgiri/CC 0.2


Building predictions: 100%|██████████| 3014/3014 [00:06<00:00, 458.29it/s]
Generating predictions with corrected thresholds:  39%|███▊      | 22/57 [36:28<1:10:59, 121.70s/it]

PlasmoDB-68_PfragileNilgiri/CC 0.3


Building predictions: 100%|██████████| 3014/3014 [00:06<00:00, 450.62it/s]
Generating predictions with corrected thresholds:  40%|████      | 23/57 [36:34<53:58, 95.25s/it]   

PlasmoDB-68_PfragileNilgiri/MF 0.01


Building predictions: 100%|██████████| 3048/3048 [00:20<00:00, 145.30it/s]
Generating predictions with corrected thresholds:  40%|████      | 23/57 [36:55<53:58, 95.25s/it]

PlasmoDB-68_PfragileNilgiri/MF 0.05


Building predictions: 100%|██████████| 3048/3048 [00:20<00:00, 146.02it/s]
Generating predictions with corrected thresholds:  40%|████      | 23/57 [37:16<53:58, 95.25s/it]

PlasmoDB-68_PfragileNilgiri/MF 0.1


Building predictions: 100%|██████████| 3048/3048 [00:20<00:00, 147.41it/s]
Generating predictions with corrected thresholds:  40%|████      | 23/57 [37:37<53:58, 95.25s/it]

PlasmoDB-68_PfragileNilgiri/MF 0.2


Building predictions: 100%|██████████| 3048/3048 [00:21<00:00, 144.30it/s]
Generating predictions with corrected thresholds:  40%|████      | 23/57 [37:58<53:58, 95.25s/it]

PlasmoDB-68_PfragileNilgiri/MF 0.3


Building predictions: 100%|██████████| 3048/3048 [00:21<00:00, 145.02it/s]
Generating predictions with corrected thresholds:  42%|████▏     | 24/57 [38:19<53:57, 98.09s/it]

PlasmoDB-68_PgaboniG01/BP 0.01


Building predictions: 100%|██████████| 2732/2732 [00:38<00:00, 70.38it/s]
Generating predictions with corrected thresholds:  42%|████▏     | 24/57 [38:58<53:57, 98.09s/it]

PlasmoDB-68_PgaboniG01/BP 0.05


Building predictions: 100%|██████████| 2732/2732 [00:38<00:00, 70.57it/s]
Generating predictions with corrected thresholds:  42%|████▏     | 24/57 [39:37<53:57, 98.09s/it]

PlasmoDB-68_PgaboniG01/BP 0.1


Building predictions: 100%|██████████| 2732/2732 [00:38<00:00, 70.47it/s]
Generating predictions with corrected thresholds:  42%|████▏     | 24/57 [40:15<53:57, 98.09s/it]

PlasmoDB-68_PgaboniG01/BP 0.2


Building predictions: 100%|██████████| 2732/2732 [00:38<00:00, 71.81it/s]
Generating predictions with corrected thresholds:  42%|████▏     | 24/57 [40:54<53:57, 98.09s/it]

PlasmoDB-68_PgaboniG01/BP 0.3


Building predictions: 100%|██████████| 2732/2732 [00:39<00:00, 70.00it/s]
Generating predictions with corrected thresholds:  44%|████▍     | 25/57 [41:33<1:07:34, 126.70s/it]

PlasmoDB-68_PgaboniG01/CC 0.01


Building predictions: 100%|██████████| 2476/2476 [00:05<00:00, 433.11it/s]
Generating predictions with corrected thresholds:  44%|████▍     | 25/57 [41:38<1:07:34, 126.70s/it]

PlasmoDB-68_PgaboniG01/CC 0.05


Building predictions: 100%|██████████| 2476/2476 [00:05<00:00, 429.03it/s]
Generating predictions with corrected thresholds:  44%|████▍     | 25/57 [41:44<1:07:34, 126.70s/it]

PlasmoDB-68_PgaboniG01/CC 0.1


Building predictions: 100%|██████████| 2476/2476 [00:05<00:00, 454.43it/s]
Generating predictions with corrected thresholds:  44%|████▍     | 25/57 [41:50<1:07:34, 126.70s/it]

PlasmoDB-68_PgaboniG01/CC 0.2


Building predictions: 100%|██████████| 2476/2476 [00:05<00:00, 459.29it/s]
Generating predictions with corrected thresholds:  44%|████▍     | 25/57 [41:55<1:07:34, 126.70s/it]

PlasmoDB-68_PgaboniG01/CC 0.3


Building predictions: 100%|██████████| 2476/2476 [00:05<00:00, 459.63it/s]
Generating predictions with corrected thresholds:  46%|████▌     | 26/57 [42:00<50:07, 97.01s/it]   

PlasmoDB-68_PgaboniG01/MF 0.01


Building predictions: 100%|██████████| 2304/2304 [00:16<00:00, 143.19it/s]
Generating predictions with corrected thresholds:  46%|████▌     | 26/57 [42:16<50:07, 97.01s/it]

PlasmoDB-68_PgaboniG01/MF 0.05


Building predictions: 100%|██████████| 2304/2304 [00:15<00:00, 147.72it/s]
Generating predictions with corrected thresholds:  46%|████▌     | 26/57 [42:32<50:07, 97.01s/it]

PlasmoDB-68_PgaboniG01/MF 0.1


Building predictions: 100%|██████████| 2304/2304 [00:15<00:00, 146.43it/s]
Generating predictions with corrected thresholds:  46%|████▌     | 26/57 [42:48<50:07, 97.01s/it]

PlasmoDB-68_PgaboniG01/MF 0.2


Building predictions: 100%|██████████| 2304/2304 [00:15<00:00, 144.27it/s]
Generating predictions with corrected thresholds:  46%|████▌     | 26/57 [43:04<50:07, 97.01s/it]

PlasmoDB-68_PgaboniG01/MF 0.3


Building predictions: 100%|██████████| 2304/2304 [00:15<00:00, 145.51it/s]
Generating predictions with corrected thresholds:  47%|████▋     | 27/57 [43:20<45:50, 91.69s/it]

PlasmoDB-68_Pgallinaceum8A/BP 0.01


Building predictions: 100%|██████████| 2607/2607 [00:37<00:00, 69.81it/s]
Generating predictions with corrected thresholds:  47%|████▋     | 27/57 [43:57<45:50, 91.69s/it]

PlasmoDB-68_Pgallinaceum8A/BP 0.05


Building predictions: 100%|██████████| 2607/2607 [00:37<00:00, 69.80it/s]
Generating predictions with corrected thresholds:  47%|████▋     | 27/57 [44:34<45:50, 91.69s/it]

PlasmoDB-68_Pgallinaceum8A/BP 0.1


Building predictions: 100%|██████████| 2607/2607 [00:36<00:00, 70.86it/s]
Generating predictions with corrected thresholds:  47%|████▋     | 27/57 [45:11<45:50, 91.69s/it]

PlasmoDB-68_Pgallinaceum8A/BP 0.2


Building predictions: 100%|██████████| 2607/2607 [00:37<00:00, 69.61it/s]
Generating predictions with corrected thresholds:  47%|████▋     | 27/57 [45:49<45:50, 91.69s/it]

PlasmoDB-68_Pgallinaceum8A/BP 0.3


Building predictions: 100%|██████████| 2607/2607 [00:37<00:00, 70.41it/s]
Generating predictions with corrected thresholds:  49%|████▉     | 28/57 [46:26<57:59, 119.99s/it]

PlasmoDB-68_Pgallinaceum8A/CC 0.01


Building predictions: 100%|██████████| 2073/2073 [00:04<00:00, 449.98it/s]
Generating predictions with corrected thresholds:  49%|████▉     | 28/57 [46:30<57:59, 119.99s/it]

PlasmoDB-68_Pgallinaceum8A/CC 0.05


Building predictions: 100%|██████████| 2073/2073 [00:04<00:00, 461.01it/s]
Generating predictions with corrected thresholds:  49%|████▉     | 28/57 [46:35<57:59, 119.99s/it]

PlasmoDB-68_Pgallinaceum8A/CC 0.1


Building predictions: 100%|██████████| 2073/2073 [00:04<00:00, 455.16it/s]
Generating predictions with corrected thresholds:  49%|████▉     | 28/57 [46:39<57:59, 119.99s/it]

PlasmoDB-68_Pgallinaceum8A/CC 0.2


Building predictions: 100%|██████████| 2073/2073 [00:04<00:00, 428.83it/s]
Generating predictions with corrected thresholds:  49%|████▉     | 28/57 [46:44<57:59, 119.99s/it]

PlasmoDB-68_Pgallinaceum8A/CC 0.3


Building predictions: 100%|██████████| 2073/2073 [00:04<00:00, 452.72it/s]
Generating predictions with corrected thresholds:  51%|█████     | 29/57 [46:49<42:25, 90.92s/it] 

PlasmoDB-68_Pgallinaceum8A/MF 0.01


Building predictions: 100%|██████████| 2172/2172 [00:14<00:00, 147.18it/s]
Generating predictions with corrected thresholds:  51%|█████     | 29/57 [47:03<42:25, 90.92s/it]

PlasmoDB-68_Pgallinaceum8A/MF 0.05


Building predictions: 100%|██████████| 2172/2172 [00:14<00:00, 146.59it/s]
Generating predictions with corrected thresholds:  51%|█████     | 29/57 [47:18<42:25, 90.92s/it]

PlasmoDB-68_Pgallinaceum8A/MF 0.1


Building predictions: 100%|██████████| 2172/2172 [00:14<00:00, 148.99it/s]
Generating predictions with corrected thresholds:  51%|█████     | 29/57 [47:33<42:25, 90.92s/it]

PlasmoDB-68_Pgallinaceum8A/MF 0.2


Building predictions: 100%|██████████| 2172/2172 [00:14<00:00, 147.05it/s]
Generating predictions with corrected thresholds:  51%|█████     | 29/57 [47:48<42:25, 90.92s/it]

PlasmoDB-68_Pgallinaceum8A/MF 0.3


Building predictions: 100%|██████████| 2172/2172 [00:14<00:00, 149.03it/s]
Generating predictions with corrected thresholds:  53%|█████▎    | 30/57 [48:02<38:34, 85.71s/it]

PlasmoDB-68_PinuiSanAntonio1/BP 0.01


Building predictions: 100%|██████████| 3682/3682 [00:51<00:00, 70.83it/s]
Generating predictions with corrected thresholds:  53%|█████▎    | 30/57 [48:54<38:34, 85.71s/it]

PlasmoDB-68_PinuiSanAntonio1/BP 0.05


Building predictions: 100%|██████████| 3682/3682 [00:51<00:00, 71.46it/s]
Generating predictions with corrected thresholds:  53%|█████▎    | 30/57 [49:46<38:34, 85.71s/it]

PlasmoDB-68_PinuiSanAntonio1/BP 0.1


Building predictions: 100%|██████████| 3682/3682 [00:51<00:00, 71.14it/s]
Generating predictions with corrected thresholds:  53%|█████▎    | 30/57 [50:38<38:34, 85.71s/it]

PlasmoDB-68_PinuiSanAntonio1/BP 0.2


Building predictions: 100%|██████████| 3682/3682 [00:52<00:00, 70.24it/s]
Generating predictions with corrected thresholds:  53%|█████▎    | 30/57 [51:30<38:34, 85.71s/it]

PlasmoDB-68_PinuiSanAntonio1/BP 0.3


Building predictions: 100%|██████████| 3682/3682 [00:52<00:00, 70.11it/s]
Generating predictions with corrected thresholds:  54%|█████▍    | 31/57 [52:23<59:49, 138.07s/it]

PlasmoDB-68_PinuiSanAntonio1/CC 0.01


Building predictions: 100%|██████████| 3312/3312 [00:07<00:00, 460.61it/s]
Generating predictions with corrected thresholds:  54%|█████▍    | 31/57 [52:30<59:49, 138.07s/it]

PlasmoDB-68_PinuiSanAntonio1/CC 0.05


Building predictions: 100%|██████████| 3312/3312 [00:07<00:00, 452.26it/s]
Generating predictions with corrected thresholds:  54%|█████▍    | 31/57 [52:37<59:49, 138.07s/it]

PlasmoDB-68_PinuiSanAntonio1/CC 0.1


Building predictions: 100%|██████████| 3312/3312 [00:07<00:00, 449.93it/s]
Generating predictions with corrected thresholds:  54%|█████▍    | 31/57 [52:44<59:49, 138.07s/it]

PlasmoDB-68_PinuiSanAntonio1/CC 0.2


Building predictions: 100%|██████████| 3312/3312 [00:07<00:00, 445.63it/s]
Generating predictions with corrected thresholds:  54%|█████▍    | 31/57 [52:52<59:49, 138.07s/it]

PlasmoDB-68_PinuiSanAntonio1/CC 0.3


Building predictions: 100%|██████████| 3312/3312 [00:07<00:00, 455.74it/s]
Generating predictions with corrected thresholds:  56%|█████▌    | 32/57 [52:59<44:50, 107.63s/it]

PlasmoDB-68_PinuiSanAntonio1/MF 0.01


Building predictions: 100%|██████████| 3290/3290 [00:22<00:00, 147.16it/s]
Generating predictions with corrected thresholds:  56%|█████▌    | 32/57 [53:21<44:50, 107.63s/it]

PlasmoDB-68_PinuiSanAntonio1/MF 0.05


Building predictions: 100%|██████████| 3290/3290 [00:22<00:00, 144.09it/s]
Generating predictions with corrected thresholds:  56%|█████▌    | 32/57 [53:44<44:50, 107.63s/it]

PlasmoDB-68_PinuiSanAntonio1/MF 0.1


Building predictions: 100%|██████████| 3290/3290 [00:22<00:00, 144.76it/s]
Generating predictions with corrected thresholds:  56%|█████▌    | 32/57 [54:07<44:50, 107.63s/it]

PlasmoDB-68_PinuiSanAntonio1/MF 0.2


Building predictions: 100%|██████████| 3290/3290 [00:22<00:00, 147.55it/s]
Generating predictions with corrected thresholds:  56%|█████▌    | 32/57 [54:29<44:50, 107.63s/it]

PlasmoDB-68_PinuiSanAntonio1/MF 0.3


Building predictions: 100%|██████████| 3290/3290 [00:22<00:00, 145.11it/s]
Generating predictions with corrected thresholds:  58%|█████▊    | 33/57 [54:52<43:41, 109.22s/it]

PlasmoDB-68_PknowlesiH/BP 0.01


Building predictions: 100%|██████████| 2592/2592 [00:36<00:00, 70.77it/s]
Generating predictions with corrected thresholds:  58%|█████▊    | 33/57 [55:29<43:41, 109.22s/it]

PlasmoDB-68_PknowlesiH/BP 0.05


Building predictions: 100%|██████████| 2592/2592 [00:36<00:00, 71.01it/s]
Generating predictions with corrected thresholds:  58%|█████▊    | 33/57 [56:05<43:41, 109.22s/it]

PlasmoDB-68_PknowlesiH/BP 0.1


Building predictions: 100%|██████████| 2592/2592 [00:36<00:00, 70.80it/s]
Generating predictions with corrected thresholds:  58%|█████▊    | 33/57 [56:42<43:41, 109.22s/it]

PlasmoDB-68_PknowlesiH/BP 0.2


Building predictions: 100%|██████████| 2592/2592 [00:36<00:00, 71.72it/s]
Generating predictions with corrected thresholds:  58%|█████▊    | 33/57 [57:18<43:41, 109.22s/it]

PlasmoDB-68_PknowlesiH/BP 0.3


Building predictions: 100%|██████████| 2592/2592 [00:36<00:00, 71.62it/s]
Generating predictions with corrected thresholds:  60%|█████▉    | 34/57 [57:54<50:15, 131.09s/it]

PlasmoDB-68_PknowlesiH/CC 0.01


Building predictions: 100%|██████████| 1950/1950 [00:04<00:00, 459.79it/s]
Generating predictions with corrected thresholds:  60%|█████▉    | 34/57 [57:58<50:15, 131.09s/it]

PlasmoDB-68_PknowlesiH/CC 0.05


Building predictions: 100%|██████████| 1950/1950 [00:04<00:00, 469.03it/s]
Generating predictions with corrected thresholds:  60%|█████▉    | 34/57 [58:03<50:15, 131.09s/it]

PlasmoDB-68_PknowlesiH/CC 0.1


Building predictions: 100%|██████████| 1950/1950 [00:04<00:00, 426.18it/s]
Generating predictions with corrected thresholds:  60%|█████▉    | 34/57 [58:07<50:15, 131.09s/it]

PlasmoDB-68_PknowlesiH/CC 0.2


Building predictions: 100%|██████████| 1950/1950 [00:04<00:00, 464.33it/s]
Generating predictions with corrected thresholds:  60%|█████▉    | 34/57 [58:11<50:15, 131.09s/it]

PlasmoDB-68_PknowlesiH/CC 0.3


Building predictions: 100%|██████████| 1950/1950 [00:04<00:00, 469.82it/s]
Generating predictions with corrected thresholds:  61%|██████▏   | 35/57 [58:16<35:59, 98.17s/it] 

PlasmoDB-68_PknowlesiH/MF 0.01


Building predictions: 100%|██████████| 2220/2220 [00:15<00:00, 147.48it/s]
Generating predictions with corrected thresholds:  61%|██████▏   | 35/57 [58:31<35:59, 98.17s/it]

PlasmoDB-68_PknowlesiH/MF 0.05


Building predictions: 100%|██████████| 2220/2220 [00:14<00:00, 152.75it/s]
Generating predictions with corrected thresholds:  61%|██████▏   | 35/57 [58:45<35:59, 98.17s/it]

PlasmoDB-68_PknowlesiH/MF 0.1


Building predictions: 100%|██████████| 2220/2220 [00:14<00:00, 150.37it/s]
Generating predictions with corrected thresholds:  61%|██████▏   | 35/57 [59:00<35:59, 98.17s/it]

PlasmoDB-68_PknowlesiH/MF 0.2


Building predictions: 100%|██████████| 2220/2220 [00:15<00:00, 147.75it/s]
Generating predictions with corrected thresholds:  61%|██████▏   | 35/57 [59:15<35:59, 98.17s/it]

PlasmoDB-68_PknowlesiH/MF 0.3


Building predictions: 100%|██████████| 2220/2220 [00:14<00:00, 151.55it/s]
Generating predictions with corrected thresholds:  63%|██████▎   | 36/57 [59:30<31:49, 90.94s/it]

PlasmoDB-68_PmalariaeUG01/BP 0.01


Building predictions: 100%|██████████| 2868/2868 [00:39<00:00, 72.57it/s]
Generating predictions with corrected thresholds:  63%|██████▎   | 36/57 [1:00:09<31:49, 90.94s/it]

PlasmoDB-68_PmalariaeUG01/BP 0.05


Building predictions: 100%|██████████| 2868/2868 [00:39<00:00, 72.70it/s]
Generating predictions with corrected thresholds:  63%|██████▎   | 36/57 [1:00:49<31:49, 90.94s/it]

PlasmoDB-68_PmalariaeUG01/BP 0.1


Building predictions: 100%|██████████| 2868/2868 [00:39<00:00, 72.89it/s]
Generating predictions with corrected thresholds:  63%|██████▎   | 36/57 [1:01:28<31:49, 90.94s/it]

PlasmoDB-68_PmalariaeUG01/BP 0.2


Building predictions: 100%|██████████| 2868/2868 [00:39<00:00, 72.58it/s]
Generating predictions with corrected thresholds:  63%|██████▎   | 36/57 [1:02:07<31:49, 90.94s/it]

PlasmoDB-68_PmalariaeUG01/BP 0.3


Building predictions: 100%|██████████| 2868/2868 [00:39<00:00, 72.62it/s]
Generating predictions with corrected thresholds:  65%|██████▍   | 37/57 [1:02:47<40:57, 122.87s/it]

PlasmoDB-68_PmalariaeUG01/CC 0.01


Building predictions: 100%|██████████| 2035/2035 [00:04<00:00, 472.65it/s]
Generating predictions with corrected thresholds:  65%|██████▍   | 37/57 [1:02:51<40:57, 122.87s/it]

PlasmoDB-68_PmalariaeUG01/CC 0.05


Building predictions: 100%|██████████| 2035/2035 [00:04<00:00, 475.75it/s]
Generating predictions with corrected thresholds:  65%|██████▍   | 37/57 [1:02:56<40:57, 122.87s/it]

PlasmoDB-68_PmalariaeUG01/CC 0.1


Building predictions: 100%|██████████| 2035/2035 [00:04<00:00, 469.92it/s]
Generating predictions with corrected thresholds:  65%|██████▍   | 37/57 [1:03:00<40:57, 122.87s/it]

PlasmoDB-68_PmalariaeUG01/CC 0.2


Building predictions: 100%|██████████| 2035/2035 [00:04<00:00, 464.42it/s]
Generating predictions with corrected thresholds:  65%|██████▍   | 37/57 [1:03:04<40:57, 122.87s/it]

PlasmoDB-68_PmalariaeUG01/CC 0.3


Building predictions: 100%|██████████| 2035/2035 [00:04<00:00, 438.66it/s]
Generating predictions with corrected thresholds:  67%|██████▋   | 38/57 [1:03:09<29:19, 92.60s/it] 

PlasmoDB-68_PmalariaeUG01/MF 0.01


Building predictions: 100%|██████████| 2478/2478 [00:16<00:00, 153.26it/s]
Generating predictions with corrected thresholds:  67%|██████▋   | 38/57 [1:03:25<29:19, 92.60s/it]

PlasmoDB-68_PmalariaeUG01/MF 0.05


Building predictions: 100%|██████████| 2478/2478 [00:16<00:00, 148.18it/s]
Generating predictions with corrected thresholds:  67%|██████▋   | 38/57 [1:03:42<29:19, 92.60s/it]

PlasmoDB-68_PmalariaeUG01/MF 0.1


Building predictions: 100%|██████████| 2478/2478 [00:16<00:00, 154.04it/s]
Generating predictions with corrected thresholds:  67%|██████▋   | 38/57 [1:03:58<29:19, 92.60s/it]

PlasmoDB-68_PmalariaeUG01/MF 0.2


Building predictions: 100%|██████████| 2478/2478 [00:16<00:00, 152.34it/s]
Generating predictions with corrected thresholds:  67%|██████▋   | 38/57 [1:04:14<29:19, 92.60s/it]

PlasmoDB-68_PmalariaeUG01/MF 0.3


Building predictions: 100%|██████████| 2478/2478 [00:16<00:00, 153.96it/s]
Generating predictions with corrected thresholds:  68%|██████▊   | 39/57 [1:04:30<26:46, 89.23s/it]

PlasmoDB-68_PovalecurtisiGH01/BP 0.01


Building predictions: 100%|██████████| 2824/2824 [00:38<00:00, 72.84it/s]
Generating predictions with corrected thresholds:  68%|██████▊   | 39/57 [1:05:09<26:46, 89.23s/it]

PlasmoDB-68_PovalecurtisiGH01/BP 0.05


Building predictions: 100%|██████████| 2824/2824 [00:38<00:00, 73.18it/s]
Generating predictions with corrected thresholds:  68%|██████▊   | 39/57 [1:05:48<26:46, 89.23s/it]

PlasmoDB-68_PovalecurtisiGH01/BP 0.1


Building predictions: 100%|██████████| 2824/2824 [00:38<00:00, 73.25it/s]
Generating predictions with corrected thresholds:  68%|██████▊   | 39/57 [1:06:26<26:46, 89.23s/it]

PlasmoDB-68_PovalecurtisiGH01/BP 0.2


Building predictions: 100%|██████████| 2824/2824 [00:38<00:00, 72.96it/s]
Generating predictions with corrected thresholds:  68%|██████▊   | 39/57 [1:07:05<26:46, 89.23s/it]

PlasmoDB-68_PovalecurtisiGH01/BP 0.3


Building predictions: 100%|██████████| 2824/2824 [00:38<00:00, 72.79it/s]
Generating predictions with corrected thresholds:  70%|███████   | 40/57 [1:07:44<34:08, 120.50s/it]

PlasmoDB-68_PovalecurtisiGH01/CC 0.01


Building predictions: 100%|██████████| 2718/2718 [00:05<00:00, 475.01it/s]
Generating predictions with corrected thresholds:  70%|███████   | 40/57 [1:07:50<34:08, 120.50s/it]

PlasmoDB-68_PovalecurtisiGH01/CC 0.05


Building predictions: 100%|██████████| 2718/2718 [00:05<00:00, 471.56it/s]
Generating predictions with corrected thresholds:  70%|███████   | 40/57 [1:07:55<34:08, 120.50s/it]

PlasmoDB-68_PovalecurtisiGH01/CC 0.1


Building predictions: 100%|██████████| 2718/2718 [00:05<00:00, 472.34it/s]
Generating predictions with corrected thresholds:  70%|███████   | 40/57 [1:08:01<34:08, 120.50s/it]

PlasmoDB-68_PovalecurtisiGH01/CC 0.2


Building predictions: 100%|██████████| 2718/2718 [00:05<00:00, 468.42it/s]
Generating predictions with corrected thresholds:  70%|███████   | 40/57 [1:08:07<34:08, 120.50s/it]

PlasmoDB-68_PovalecurtisiGH01/CC 0.3


Building predictions: 100%|██████████| 2718/2718 [00:06<00:00, 452.76it/s]
Generating predictions with corrected thresholds:  72%|███████▏  | 41/57 [1:08:13<24:49, 93.09s/it] 

PlasmoDB-68_PovalecurtisiGH01/MF 0.01


Building predictions: 100%|██████████| 2488/2488 [00:16<00:00, 154.76it/s]
Generating predictions with corrected thresholds:  72%|███████▏  | 41/57 [1:08:29<24:49, 93.09s/it]

PlasmoDB-68_PovalecurtisiGH01/MF 0.05


Building predictions: 100%|██████████| 2488/2488 [00:15<00:00, 158.69it/s]
Generating predictions with corrected thresholds:  72%|███████▏  | 41/57 [1:08:45<24:49, 93.09s/it]

PlasmoDB-68_PovalecurtisiGH01/MF 0.1


Building predictions: 100%|██████████| 2488/2488 [00:15<00:00, 165.27it/s]
Generating predictions with corrected thresholds:  72%|███████▏  | 41/57 [1:09:00<24:49, 93.09s/it]

PlasmoDB-68_PovalecurtisiGH01/MF 0.2


Building predictions: 100%|██████████| 2488/2488 [00:15<00:00, 165.51it/s]
Generating predictions with corrected thresholds:  72%|███████▏  | 41/57 [1:09:15<24:49, 93.09s/it]

PlasmoDB-68_PovalecurtisiGH01/MF 0.3


Building predictions: 100%|██████████| 2488/2488 [00:15<00:00, 165.63it/s]
Generating predictions with corrected thresholds:  74%|███████▎  | 42/57 [1:09:30<22:03, 88.23s/it]

PlasmoDB-68_PovalewallikeriPowCR01/BP 0.01


Building predictions: 100%|██████████| 2976/2976 [00:38<00:00, 78.28it/s]
Generating predictions with corrected thresholds:  74%|███████▎  | 42/57 [1:10:08<22:03, 88.23s/it]

PlasmoDB-68_PovalewallikeriPowCR01/BP 0.05


Building predictions: 100%|██████████| 2976/2976 [00:37<00:00, 78.79it/s]
Generating predictions with corrected thresholds:  74%|███████▎  | 42/57 [1:10:46<22:03, 88.23s/it]

PlasmoDB-68_PovalewallikeriPowCR01/BP 0.1


Building predictions: 100%|██████████| 2976/2976 [00:37<00:00, 78.79it/s]
Generating predictions with corrected thresholds:  74%|███████▎  | 42/57 [1:11:23<22:03, 88.23s/it]

PlasmoDB-68_PovalewallikeriPowCR01/BP 0.2


Building predictions: 100%|██████████| 2976/2976 [00:37<00:00, 78.53it/s]
Generating predictions with corrected thresholds:  74%|███████▎  | 42/57 [1:12:01<22:03, 88.23s/it]

PlasmoDB-68_PovalewallikeriPowCR01/BP 0.3


Building predictions: 100%|██████████| 2976/2976 [00:37<00:00, 78.48it/s]
Generating predictions with corrected thresholds:  75%|███████▌  | 43/57 [1:12:39<27:40, 118.59s/it]

PlasmoDB-68_PovalewallikeriPowCR01/CC 0.01


Building predictions: 100%|██████████| 2556/2556 [00:05<00:00, 507.58it/s]
Generating predictions with corrected thresholds:  75%|███████▌  | 43/57 [1:12:44<27:40, 118.59s/it]

PlasmoDB-68_PovalewallikeriPowCR01/CC 0.05


Building predictions: 100%|██████████| 2556/2556 [00:05<00:00, 505.10it/s]
Generating predictions with corrected thresholds:  75%|███████▌  | 43/57 [1:12:49<27:40, 118.59s/it]

PlasmoDB-68_PovalewallikeriPowCR01/CC 0.1


Building predictions: 100%|██████████| 2556/2556 [00:05<00:00, 496.59it/s]
Generating predictions with corrected thresholds:  75%|███████▌  | 43/57 [1:12:54<27:40, 118.59s/it]

PlasmoDB-68_PovalewallikeriPowCR01/CC 0.2


Building predictions: 100%|██████████| 2556/2556 [00:05<00:00, 503.99it/s]
Generating predictions with corrected thresholds:  75%|███████▌  | 43/57 [1:13:00<27:40, 118.59s/it]

PlasmoDB-68_PovalewallikeriPowCR01/CC 0.3


Building predictions: 100%|██████████| 2556/2556 [00:05<00:00, 505.83it/s]
Generating predictions with corrected thresholds:  77%|███████▋  | 44/57 [1:13:06<19:38, 90.63s/it] 

PlasmoDB-68_PovalewallikeriPowCR01/MF 0.01


Building predictions: 100%|██████████| 2584/2584 [00:15<00:00, 166.66it/s]
Generating predictions with corrected thresholds:  77%|███████▋  | 44/57 [1:13:21<19:38, 90.63s/it]

PlasmoDB-68_PovalewallikeriPowCR01/MF 0.05


Building predictions: 100%|██████████| 2584/2584 [00:15<00:00, 161.60it/s]
Generating predictions with corrected thresholds:  77%|███████▋  | 44/57 [1:13:37<19:38, 90.63s/it]

PlasmoDB-68_PovalewallikeriPowCR01/MF 0.1


Building predictions: 100%|██████████| 2584/2584 [00:15<00:00, 165.24it/s]
Generating predictions with corrected thresholds:  77%|███████▋  | 44/57 [1:13:53<19:38, 90.63s/it]

PlasmoDB-68_PovalewallikeriPowCR01/MF 0.2


Building predictions: 100%|██████████| 2584/2584 [00:15<00:00, 162.54it/s]
Generating predictions with corrected thresholds:  77%|███████▋  | 44/57 [1:14:09<19:38, 90.63s/it]

PlasmoDB-68_PovalewallikeriPowCR01/MF 0.3


Building predictions: 100%|██████████| 2584/2584 [00:16<00:00, 161.33it/s]
Generating predictions with corrected thresholds:  79%|███████▉  | 45/57 [1:14:25<17:29, 87.48s/it]

PlasmoDB-68_PreichenowiCDC/BP 0.01


Building predictions: 100%|██████████| 2485/2485 [00:31<00:00, 78.53it/s]
Generating predictions with corrected thresholds:  79%|███████▉  | 45/57 [1:14:56<17:29, 87.48s/it]

PlasmoDB-68_PreichenowiCDC/BP 0.05


Building predictions: 100%|██████████| 2485/2485 [00:31<00:00, 78.59it/s]
Generating predictions with corrected thresholds:  79%|███████▉  | 45/57 [1:15:28<17:29, 87.48s/it]

PlasmoDB-68_PreichenowiCDC/BP 0.1


Building predictions: 100%|██████████| 2485/2485 [00:31<00:00, 78.55it/s]
Generating predictions with corrected thresholds:  79%|███████▉  | 45/57 [1:16:00<17:29, 87.48s/it]

PlasmoDB-68_PreichenowiCDC/BP 0.2


Building predictions: 100%|██████████| 2485/2485 [00:31<00:00, 78.43it/s]
Generating predictions with corrected thresholds:  79%|███████▉  | 45/57 [1:16:31<17:29, 87.48s/it]

PlasmoDB-68_PreichenowiCDC/BP 0.3


Building predictions: 100%|██████████| 2485/2485 [00:32<00:00, 75.38it/s]
Generating predictions with corrected thresholds:  81%|████████  | 46/57 [1:17:04<20:00, 109.11s/it]

PlasmoDB-68_PreichenowiCDC/CC 0.01


Building predictions: 100%|██████████| 1772/1772 [00:03<00:00, 464.50it/s]
Generating predictions with corrected thresholds:  81%|████████  | 46/57 [1:17:08<20:00, 109.11s/it]

PlasmoDB-68_PreichenowiCDC/CC 0.05


Building predictions: 100%|██████████| 1772/1772 [00:03<00:00, 473.00it/s]
Generating predictions with corrected thresholds:  81%|████████  | 46/57 [1:17:12<20:00, 109.11s/it]

PlasmoDB-68_PreichenowiCDC/CC 0.1


Building predictions: 100%|██████████| 1772/1772 [00:03<00:00, 466.15it/s]
Generating predictions with corrected thresholds:  81%|████████  | 46/57 [1:17:16<20:00, 109.11s/it]

PlasmoDB-68_PreichenowiCDC/CC 0.2


Building predictions: 100%|██████████| 1772/1772 [00:03<00:00, 465.44it/s]
Generating predictions with corrected thresholds:  81%|████████  | 46/57 [1:17:19<20:00, 109.11s/it]

PlasmoDB-68_PreichenowiCDC/CC 0.3


Building predictions: 100%|██████████| 1772/1772 [00:04<00:00, 442.47it/s]
Generating predictions with corrected thresholds:  82%|████████▏ | 47/57 [1:17:24<13:41, 82.14s/it] 

PlasmoDB-68_PreichenowiCDC/MF 0.01


Building predictions: 100%|██████████| 2121/2121 [00:14<00:00, 149.66it/s]
Generating predictions with corrected thresholds:  82%|████████▏ | 47/57 [1:17:38<13:41, 82.14s/it]

PlasmoDB-68_PreichenowiCDC/MF 0.05


Building predictions: 100%|██████████| 2121/2121 [00:13<00:00, 154.15it/s]
Generating predictions with corrected thresholds:  82%|████████▏ | 47/57 [1:17:51<13:41, 82.14s/it]

PlasmoDB-68_PreichenowiCDC/MF 0.1


Building predictions: 100%|██████████| 2121/2121 [00:14<00:00, 150.07it/s]
Generating predictions with corrected thresholds:  82%|████████▏ | 47/57 [1:18:06<13:41, 82.14s/it]

PlasmoDB-68_PreichenowiCDC/MF 0.2


Building predictions: 100%|██████████| 2121/2121 [00:13<00:00, 152.92it/s]
Generating predictions with corrected thresholds:  82%|████████▏ | 47/57 [1:18:19<13:41, 82.14s/it]

PlasmoDB-68_PreichenowiCDC/MF 0.3


Building predictions: 100%|██████████| 2121/2121 [00:14<00:00, 150.63it/s]
Generating predictions with corrected thresholds:  84%|████████▍ | 48/57 [1:18:34<11:46, 78.51s/it]

PlasmoDB-68_PvinckeibrucechwattiDA/BP 0.01


Building predictions: 100%|██████████| 2680/2680 [00:36<00:00, 72.48it/s]
Generating predictions with corrected thresholds:  84%|████████▍ | 48/57 [1:19:11<11:46, 78.51s/it]

PlasmoDB-68_PvinckeibrucechwattiDA/BP 0.05


Building predictions: 100%|██████████| 2680/2680 [00:36<00:00, 72.46it/s]
Generating predictions with corrected thresholds:  84%|████████▍ | 48/57 [1:19:48<11:46, 78.51s/it]

PlasmoDB-68_PvinckeibrucechwattiDA/BP 0.1


Building predictions: 100%|██████████| 2680/2680 [00:36<00:00, 72.59it/s]
Generating predictions with corrected thresholds:  84%|████████▍ | 48/57 [1:20:24<11:46, 78.51s/it]

PlasmoDB-68_PvinckeibrucechwattiDA/BP 0.2


Building predictions: 100%|██████████| 2680/2680 [00:37<00:00, 72.36it/s]
Generating predictions with corrected thresholds:  84%|████████▍ | 48/57 [1:21:02<11:46, 78.51s/it]

PlasmoDB-68_PvinckeibrucechwattiDA/BP 0.3


Building predictions: 100%|██████████| 2680/2680 [00:36<00:00, 72.67it/s]
Generating predictions with corrected thresholds:  86%|████████▌ | 49/57 [1:21:38<14:43, 110.41s/it]

PlasmoDB-68_PvinckeibrucechwattiDA/CC 0.01


Building predictions: 100%|██████████| 2526/2526 [00:05<00:00, 454.40it/s]
Generating predictions with corrected thresholds:  86%|████████▌ | 49/57 [1:21:44<14:43, 110.41s/it]

PlasmoDB-68_PvinckeibrucechwattiDA/CC 0.05


Building predictions: 100%|██████████| 2526/2526 [00:05<00:00, 471.14it/s]
Generating predictions with corrected thresholds:  86%|████████▌ | 49/57 [1:21:49<14:43, 110.41s/it]

PlasmoDB-68_PvinckeibrucechwattiDA/CC 0.1


Building predictions: 100%|██████████| 2526/2526 [00:05<00:00, 449.35it/s]
Generating predictions with corrected thresholds:  86%|████████▌ | 49/57 [1:21:55<14:43, 110.41s/it]

PlasmoDB-68_PvinckeibrucechwattiDA/CC 0.2


Building predictions: 100%|██████████| 2526/2526 [00:05<00:00, 470.66it/s]
Generating predictions with corrected thresholds:  86%|████████▌ | 49/57 [1:22:00<14:43, 110.41s/it]

PlasmoDB-68_PvinckeibrucechwattiDA/CC 0.3


Building predictions: 100%|██████████| 2526/2526 [00:05<00:00, 484.81it/s]
Generating predictions with corrected thresholds:  88%|████████▊ | 50/57 [1:22:06<09:58, 85.44s/it] 

PlasmoDB-68_PvinckeibrucechwattiDA/MF 0.01


Building predictions: 100%|██████████| 2259/2259 [00:14<00:00, 157.42it/s]
Generating predictions with corrected thresholds:  88%|████████▊ | 50/57 [1:22:20<09:58, 85.44s/it]

PlasmoDB-68_PvinckeibrucechwattiDA/MF 0.05


Building predictions: 100%|██████████| 2259/2259 [00:14<00:00, 158.70it/s]
Generating predictions with corrected thresholds:  88%|████████▊ | 50/57 [1:22:34<09:58, 85.44s/it]

PlasmoDB-68_PvinckeibrucechwattiDA/MF 0.1


Building predictions: 100%|██████████| 2259/2259 [00:14<00:00, 158.23it/s]
Generating predictions with corrected thresholds:  88%|████████▊ | 50/57 [1:22:48<09:58, 85.44s/it]

PlasmoDB-68_PvinckeibrucechwattiDA/MF 0.2


Building predictions: 100%|██████████| 2259/2259 [00:14<00:00, 155.57it/s]
Generating predictions with corrected thresholds:  88%|████████▊ | 50/57 [1:23:03<09:58, 85.44s/it]

PlasmoDB-68_PvinckeibrucechwattiDA/MF 0.3


Building predictions: 100%|██████████| 2259/2259 [00:14<00:00, 159.29it/s]
Generating predictions with corrected thresholds:  89%|████████▉ | 51/57 [1:23:17<08:07, 81.28s/it]

PlasmoDB-68_PvivaxSal1/BP 0.01


Building predictions: 100%|██████████| 2606/2606 [00:34<00:00, 74.90it/s]
Generating predictions with corrected thresholds:  89%|████████▉ | 51/57 [1:23:52<08:07, 81.28s/it]

PlasmoDB-68_PvivaxSal1/BP 0.05


Building predictions: 100%|██████████| 2606/2606 [00:34<00:00, 75.43it/s]
Generating predictions with corrected thresholds:  89%|████████▉ | 51/57 [1:24:27<08:07, 81.28s/it]

PlasmoDB-68_PvivaxSal1/BP 0.1


Building predictions: 100%|██████████| 2606/2606 [00:34<00:00, 75.17it/s]
Generating predictions with corrected thresholds:  89%|████████▉ | 51/57 [1:25:01<08:07, 81.28s/it]

PlasmoDB-68_PvivaxSal1/BP 0.2


Building predictions: 100%|██████████| 2606/2606 [00:34<00:00, 75.79it/s]
Generating predictions with corrected thresholds:  89%|████████▉ | 51/57 [1:25:36<08:07, 81.28s/it]

PlasmoDB-68_PvivaxSal1/BP 0.3


Building predictions: 100%|██████████| 2606/2606 [00:34<00:00, 74.65it/s]
Generating predictions with corrected thresholds:  91%|█████████ | 52/57 [1:26:11<09:04, 108.90s/it]

PlasmoDB-68_PvivaxSal1/CC 0.01


Building predictions: 100%|██████████| 2263/2263 [00:04<00:00, 487.58it/s]
Generating predictions with corrected thresholds:  91%|█████████ | 52/57 [1:26:15<09:04, 108.90s/it]

PlasmoDB-68_PvivaxSal1/CC 0.05


Building predictions: 100%|██████████| 2263/2263 [00:04<00:00, 490.25it/s]
Generating predictions with corrected thresholds:  91%|█████████ | 52/57 [1:26:20<09:04, 108.90s/it]

PlasmoDB-68_PvivaxSal1/CC 0.1


Building predictions: 100%|██████████| 2263/2263 [00:04<00:00, 486.64it/s]
Generating predictions with corrected thresholds:  91%|█████████ | 52/57 [1:26:24<09:04, 108.90s/it]

PlasmoDB-68_PvivaxSal1/CC 0.2


Building predictions: 100%|██████████| 2263/2263 [00:04<00:00, 487.03it/s]
Generating predictions with corrected thresholds:  91%|█████████ | 52/57 [1:26:29<09:04, 108.90s/it]

PlasmoDB-68_PvivaxSal1/CC 0.3


Building predictions: 100%|██████████| 2263/2263 [00:04<00:00, 485.87it/s]
Generating predictions with corrected thresholds:  93%|█████████▎| 53/57 [1:26:34<05:32, 83.20s/it] 

PlasmoDB-68_PvivaxSal1/MF 0.01


Building predictions: 100%|██████████| 2341/2341 [00:14<00:00, 159.10it/s]
Generating predictions with corrected thresholds:  93%|█████████▎| 53/57 [1:26:48<05:32, 83.20s/it]

PlasmoDB-68_PvivaxSal1/MF 0.05


Building predictions: 100%|██████████| 2341/2341 [00:15<00:00, 156.00it/s]
Generating predictions with corrected thresholds:  93%|█████████▎| 53/57 [1:27:03<05:32, 83.20s/it]

PlasmoDB-68_PvivaxSal1/MF 0.1


Building predictions: 100%|██████████| 2341/2341 [00:14<00:00, 158.48it/s]
Generating predictions with corrected thresholds:  93%|█████████▎| 53/57 [1:27:18<05:32, 83.20s/it]

PlasmoDB-68_PvivaxSal1/MF 0.2


Building predictions: 100%|██████████| 2341/2341 [00:14<00:00, 158.77it/s]
Generating predictions with corrected thresholds:  93%|█████████▎| 53/57 [1:27:33<05:32, 83.20s/it]

PlasmoDB-68_PvivaxSal1/MF 0.3


Building predictions: 100%|██████████| 2341/2341 [00:14<00:00, 164.20it/s]
Generating predictions with corrected thresholds:  95%|█████████▍| 54/57 [1:27:47<04:00, 80.30s/it]

PlasmoDB-68_Pyoeliiyoelii17XNL2023/BP 0.01


Building predictions: 100%|██████████| 2564/2564 [00:32<00:00, 78.63it/s]
Generating predictions with corrected thresholds:  95%|█████████▍| 54/57 [1:28:20<04:00, 80.30s/it]

PlasmoDB-68_Pyoeliiyoelii17XNL2023/BP 0.05


Building predictions: 100%|██████████| 2564/2564 [00:32<00:00, 78.91it/s]
Generating predictions with corrected thresholds:  95%|█████████▍| 54/57 [1:28:52<04:00, 80.30s/it]

PlasmoDB-68_Pyoeliiyoelii17XNL2023/BP 0.1


Building predictions: 100%|██████████| 2564/2564 [00:33<00:00, 76.56it/s]
Generating predictions with corrected thresholds:  95%|█████████▍| 54/57 [1:29:26<04:00, 80.30s/it]

PlasmoDB-68_Pyoeliiyoelii17XNL2023/BP 0.2


Building predictions: 100%|██████████| 2564/2564 [00:32<00:00, 78.46it/s]
Generating predictions with corrected thresholds:  95%|█████████▍| 54/57 [1:29:59<04:00, 80.30s/it]

PlasmoDB-68_Pyoeliiyoelii17XNL2023/BP 0.3


Building predictions: 100%|██████████| 2564/2564 [00:32<00:00, 78.41it/s]
Generating predictions with corrected thresholds:  96%|█████████▋| 55/57 [1:30:31<03:30, 105.41s/it]

PlasmoDB-68_Pyoeliiyoelii17XNL2023/CC 0.01


Building predictions: 100%|██████████| 2961/2961 [00:05<00:00, 506.19it/s]
Generating predictions with corrected thresholds:  96%|█████████▋| 55/57 [1:30:37<03:30, 105.41s/it]

PlasmoDB-68_Pyoeliiyoelii17XNL2023/CC 0.05


Building predictions: 100%|██████████| 2961/2961 [00:05<00:00, 506.72it/s]
Generating predictions with corrected thresholds:  96%|█████████▋| 55/57 [1:30:43<03:30, 105.41s/it]

PlasmoDB-68_Pyoeliiyoelii17XNL2023/CC 0.1


Building predictions: 100%|██████████| 2961/2961 [00:05<00:00, 505.65it/s]
Generating predictions with corrected thresholds:  96%|█████████▋| 55/57 [1:30:49<03:30, 105.41s/it]

PlasmoDB-68_Pyoeliiyoelii17XNL2023/CC 0.2


Building predictions: 100%|██████████| 2961/2961 [00:05<00:00, 506.45it/s]
Generating predictions with corrected thresholds:  96%|█████████▋| 55/57 [1:30:55<03:30, 105.41s/it]

PlasmoDB-68_Pyoeliiyoelii17XNL2023/CC 0.3


Building predictions: 100%|██████████| 2961/2961 [00:05<00:00, 505.21it/s]
Generating predictions with corrected thresholds:  98%|█████████▊| 56/57 [1:31:01<01:22, 82.58s/it] 

PlasmoDB-68_Pyoeliiyoelii17XNL2023/MF 0.01


Building predictions: 100%|██████████| 2169/2169 [00:13<00:00, 166.31it/s]
Generating predictions with corrected thresholds:  98%|█████████▊| 56/57 [1:31:14<01:22, 82.58s/it]

PlasmoDB-68_Pyoeliiyoelii17XNL2023/MF 0.05


Building predictions: 100%|██████████| 2169/2169 [00:13<00:00, 166.08it/s]
Generating predictions with corrected thresholds:  98%|█████████▊| 56/57 [1:31:27<01:22, 82.58s/it]

PlasmoDB-68_Pyoeliiyoelii17XNL2023/MF 0.1


Building predictions: 100%|██████████| 2169/2169 [00:13<00:00, 166.26it/s]
Generating predictions with corrected thresholds:  98%|█████████▊| 56/57 [1:31:40<01:22, 82.58s/it]

PlasmoDB-68_Pyoeliiyoelii17XNL2023/MF 0.2


Building predictions: 100%|██████████| 2169/2169 [00:13<00:00, 166.71it/s]
Generating predictions with corrected thresholds:  98%|█████████▊| 56/57 [1:31:53<01:22, 82.58s/it]

PlasmoDB-68_Pyoeliiyoelii17XNL2023/MF 0.3


Building predictions: 100%|██████████| 2169/2169 [00:13<00:00, 166.06it/s]
                                                                                                   

Select deepest terms after prediction

In [None]:
with open("raw_data_from_uniprot/function_ALL_data_ancestors_dict.json") as f:
    function_ancestors_dict = json.load(f)
with open("raw_data_from_uniprot/function_ALL_data_json_dict.json") as f:
    function_json_dict = json.load(f)
with open("raw_data_from_uniprot/process_ALL_data_ancestors_dict.json") as f:
    process_ancestors_dict = json.load(f)
with open("raw_data_from_uniprot/process_ALL_data_json_dict.json") as f:
    process_json_dict = json.load(f)
with open("raw_data_from_uniprot/component_ALL_data_ancestors_dict.json") as f:
    component_ancestors_dict = json.load(f)
with open("raw_data_from_uniprot/component_ALL_data_json_dict.json") as f:
    component_json_dict = json.load(f)

In [49]:
def select_deepest_terms(pred_dict, ancestors_dict, filtered_json_dict, mlb):
    """
    For each protein in pred_dict (mapping protein to {GO term: effective score}),
    select only the deepest GO terms that have all their ancestors present.
    
    Returns:
      dict: Mapping from protein entry -> dict of {GO term: effective score} for the deepest predictions.
    """
    deepest_predictions = {}
    
    for entry, go_dict in pred_dict.items():
        # Start with the set of predicted terms that are also in the MLb classes.
        predicted_terms = {term: score for term, score in go_dict.items() if term in mlb.classes_}
        
        # Step 1: Ensure ancestor consistency: only keep terms where, if defined,
        # all their ancestors are also predicted.
        valid_terms = {}
        for term, score in predicted_terms.items():
            if term in ancestors_dict:
                if set(ancestors_dict[term]).issubset(predicted_terms.keys()):
                    valid_terms[term] = score
            else:
                valid_terms[term] = score
        
        # Step 2: Select the deepest terms.
        # Remove any term that has a child in the valid set.
        deepest_terms = dict(valid_terms)  
        for term in valid_terms:
            if term in filtered_json_dict:
                children = set(filtered_json_dict[term])
                # If any child of term is also in valid_terms, remove the parent term.
                if children & valid_terms.keys():
                    deepest_terms.pop(term, None)
        
        deepest_predictions[entry] = deepest_terms  # mapping term -> effective score
    
    return deepest_predictions

def annotate_go_terms(predictions_dict, json_dict):
    """
    Convert a predictions dictionary mapping each protein to a mapping of
    GO term to effective score into a dictionary mapping each protein to a set
    of (GO term, effective score, GO term name) tuples.
    """
    annotated_dict = {}
    for protein, term_dict in predictions_dict.items():
        annotated_dict[protein] = {
            (term, score, json_dict.get(term, {}).get('name', term))
            for term, score in term_dict.items()
        }
    return annotated_dict

In [50]:
anc_dicts = {
    "MF": function_ancestors_dict,
    "BP": process_ancestors_dict,
    "CC": component_ancestors_dict,
}

json_dicts = {
    "MF": function_json_dict,
    "BP": process_json_dict,
    "CC": component_json_dict,
}

filtered_json_dict = {}
ancestors_dict_filtered = {}

for asp in ["MF", "BP", "CC"]:
    mlb   = mlb_by_asp[asp]
    anc   = anc_dicts[asp]
    js    = json_dicts[asp]

    # 1) Filter JSON children to only those in mlb.classes_
    fj = {}
    for term, info in js.items():
        children = info.get("children", [])
        fj[term] = [c["id"] for c in children if c["id"] in mlb.classes_]
    filtered_json_dict[asp] = fj

    # 2) Filter ancestor lists to only include mlb.classes_
    fa = {}
    for term, parents in anc.items():
        if parents:
            valid = [p for p in parents if p in mlb.classes_]
            if valid:
                fa[term] = valid
    ancestors_dict_filtered[asp] = fa

In [51]:
target_fdrs = [0.01, 0.05, 0.10, 0.20, 0.30]

deepest_annotations = {}

for species, asp_dict in all_predictions.items():
    deepest_annotations[species] = {}
    for asp, preds_for_aspect in asp_dict.items():
        anc_dict = ancestors_dict_filtered[asp]
        js_dict  = json_dicts[asp]
        child_map = filtered_json_dict[asp]

        deepest_annotations[species][asp] = {}

        for fdr in tqdm(target_fdrs, desc=f"{species}/{asp}", leave=False):
            if fdr not in preds_for_aspect:
                continue  # skip if this FDR wasn't generated

            pred_dict = preds_for_aspect[fdr]

            # select only the deepest terms
            deepest = select_deepest_terms(
                pred_dict,
                anc_dict,
                child_map,
                mlb_by_asp[asp]
            )
            # annotate them
            annots = annotate_go_terms(deepest, js_dict)
            deepest_annotations[species][asp][fdr] = annots

                                                                                    

Get the depths for the deepest terms 

In [52]:
from goatools.obo_parser import GODag
import numpy as np

obodag = GODag("go-basic.obo")

# 1) Build deepest_levels from deepest_annotations
deepest_levels = {}

for species, asp_dict in deepest_annotations.items():
    deepest_levels[species] = {}
    for asp, fdr_dict in asp_dict.items():
        deepest_levels[species][asp] = {}
        for fdr, annots in fdr_dict.items():
            # annots: {protein: {(go_id, score, name), …}, …}
            level_map = {}
            for prot, terms in annots.items():
                lvl_set = set()
                for go_id, score, name in terms:
                    depth = obodag[go_id].depth if go_id in obodag else np.nan
                    lvl_set.add((go_id, name, depth))
                level_map[prot] = lvl_set
            deepest_levels[species][asp][fdr] = level_map


go-basic.obo: fmt(1.2) rel(2024-09-08) 44,296 Terms


In [53]:
import pandas as pd
from collections import Counter
from pathlib import Path

out_dir = Path("genomes_to_annotate_with_PlasmoFP/depth_counts_for_deepest_terms_2")
out_dir.mkdir(exist_ok=True)

target_fdrs = [0.01, 0.05, 0.10, 0.20, 0.30]

for species, asp_dict in deepest_levels.items():
    for asp, fdr_dict in asp_dict.items():
        depth_counters = {}
        for fdr in target_fdrs:
            lvl_map = fdr_dict.get(fdr, {})
            depths = [
                depth
                for terms in lvl_map.values()
                for (_, _, depth) in terms
                if isinstance(depth, int)
            ]
            depth_counters[fdr] = Counter(depths)

        all_depths = sorted({d for cnt in depth_counters.values() for d in cnt})
        if not all_depths:
            print(f"Skipping {species}_{asp}, no depth data")
            continue

        df = pd.DataFrame(
            {f"{fdr:.2f}": [depth_counters[fdr].get(d, 0) for d in all_depths]
             for fdr in target_fdrs},
            index=all_depths
        )
        df.index.name = "Depth"

        fname = f"{species}_{asp}_depth_counts.csv"
        df.to_csv(out_dir / fname)
        print(f"Wrote {fname}")


Wrote PlasmoDB-68_PadleriG01_BP_depth_counts.csv
Wrote PlasmoDB-68_PadleriG01_CC_depth_counts.csv
Wrote PlasmoDB-68_PadleriG01_MF_depth_counts.csv
Wrote PlasmoDB-68_PbergheiANKA_BP_depth_counts.csv
Wrote PlasmoDB-68_PbergheiANKA_CC_depth_counts.csv
Wrote PlasmoDB-68_PbergheiANKA_MF_depth_counts.csv
Wrote PlasmoDB-68_PblacklockiG01_BP_depth_counts.csv
Wrote PlasmoDB-68_PblacklockiG01_CC_depth_counts.csv
Wrote PlasmoDB-68_PblacklockiG01_MF_depth_counts.csv
Wrote PlasmoDB-68_Pchabaudichabaudi_BP_depth_counts.csv
Wrote PlasmoDB-68_Pchabaudichabaudi_CC_depth_counts.csv
Wrote PlasmoDB-68_Pchabaudichabaudi_MF_depth_counts.csv
Wrote PlasmoDB-68_PcoatneyiHackeri_BP_depth_counts.csv
Wrote PlasmoDB-68_PcoatneyiHackeri_CC_depth_counts.csv
Wrote PlasmoDB-68_PcoatneyiHackeri_MF_depth_counts.csv
Wrote PlasmoDB-68_PcynomolgiM_BP_depth_counts.csv
Wrote PlasmoDB-68_PcynomolgiM_CC_depth_counts.csv
Wrote PlasmoDB-68_PcynomolgiM_MF_depth_counts.csv
Wrote PlasmoDB-68_Pfalciparum3D7_BP_depth_counts.csv
Wrote

In [55]:
#save the deepest_annotations dict to a pickle file
with open("genomes_to_annotate_with_PlasmoFP/deepest_annotations.pkl", "wb") as f:
    pickle.dump(deepest_annotations, f)
#save the filtered_deepest dict to a pickle file
with open("genomes_to_annotate_with_PlasmoFP/filtered_deepest.pkl", "wb") as f:
    pickle.dump(filtered_deepest, f)

#load the filtered_deepest dict from the pickle file

In [56]:
# deepest_annotations[species][asp][fdr] = { protein: {(go_id, score, name), …}, … }
import obonet

# 1) load the DO-NOT-ANNOTATE terms
graph = obonet.read_obo("gocheck_do_not_annotate.obo")
DO_NOT_ANNOTATE = set(graph.nodes)

# 2) build a filtered version
filtered_deepest = {}

for species, asp_dict in deepest_annotations.items():
    filtered_deepest[species] = {}
    for asp, fdr_dict in asp_dict.items():
        filtered_deepest[species][asp] = {}
        for fdr, prot_dict in fdr_dict.items():
            new_prot_dict = {}
            for prot, termset in prot_dict.items():
                # drop any tuple whose go_id is in DO_NOT_ANNOTATE
                filtered_terms = {
                    (gid, score, name)
                    for (gid, score, name) in termset
                    if gid not in DO_NOT_ANNOTATE
                }
                if filtered_terms:
                    new_prot_dict[prot] = filtered_terms
            # store only if there's anything left
            filtered_deepest[species][asp][fdr] = new_prot_dict

# Now `filtered_deepest` mirrors `deepest_annotations` but with DO_NOT_ANNOTATE terms removed.


In [57]:
# define the three GO root IDs
ROOT_TERMS = {
    "MF": "GO:0003674",  # molecular_function root
    "BP": "GO:0008150",  # biological_process root
    "CC": "GO:0005575",  # cellular_component root
}

zeroed = {}

for species, asp_dict in filtered_deepest.items():
    zeroed[species] = {}
    for asp, fdr_dict in asp_dict.items():
        root_id = ROOT_TERMS[asp]
        zeroed[species][asp] = {}

        for fdr, prot_dict in fdr_dict.items():
            new_prot_dict = {}
            for prot, termset in prot_dict.items():
                # remove the root term if present
                filtered_terms = {
                    (gid, score, name)
                    for (gid, score, name) in termset
                    if gid != root_id
                }
                # assign even if empty
                new_prot_dict[prot] = filtered_terms

            zeroed[species][asp][fdr] = new_prot_dict

# Now `zeroed` has the exact same structure as `filtered_deepest`,
# except any protein whose only annotation was the root now has an empty set.


In [58]:
import pickle
from pathlib import Path

orig_dir   = Path("genomes_to_annotate_with_PlasmoFP/gene_dicts_out_complete_and_filtered_2")
out_dir    = Path("genomes_to_annotate_with_PlasmoFP/with_PFP_predictions_complete_2")
out_dir.mkdir(exist_ok=True)

pfp_preds = zeroed  # or deepest_annotations

for gene_fp in orig_dir.glob("PlasmoDB-68_*_gene_dict_complete.pkl"):
    species  = gene_fp.stem.rsplit("_gene_dict", 1)[0]
    orig     = pickle.load(open(gene_fp, "rb"))
    augmented = {}

    # 1) pre‐compute which proteins got predictions in each aspect
    proteins_with_preds = {"MF": set(), "BP": set(), "CC": set()}
    for asp, asp_dict in pfp_preds.get(species, {}).items():
        for prot_dict in asp_dict.values():
            proteins_with_preds[asp].update(prot_dict.keys())

    # 2) union across aspects → proteins with any PFP entry
    proteins_with_any = set().union(*proteins_with_preds.values())

    # 3) augment each record, only injecting PFP fields for those in proteins_with_any
    for prot, rec in orig.items():
        new_rec = rec.copy()
        if prot in proteins_with_any:
            for asp, label in [("MF","PFP MF"), ("BP","PFP BP"), ("CC","PFP CC")]:
                aspect_preds = {}
                for fdr, prot_dict in pfp_preds.get(species, {})\
                                                   .get(asp, {})\
                                                   .items():
                    aspect_preds[fdr] = prot_dict.get(prot, set())
                new_rec[label] = aspect_preds
        augmented[prot] = new_rec

    # 4) save out
    out_fp = out_dir / f"{species}_gene_dict_with_PFP.pkl"
    with open(out_fp, "wb") as f:
        pickle.dump(augmented, f)

    # 5) compute and print stats
    total        = len(orig)
    mf_cnt       = len(proteins_with_preds["MF"])
    bp_cnt       = len(proteins_with_preds["BP"])
    cc_cnt       = len(proteins_with_preds["CC"])
    any_cnt      = len(proteins_with_any)
    none_cnt     = total - any_cnt

    print(
        f"{species}: {total} total proteins  →  "
        f"PFP MF: {mf_cnt}, BP: {bp_cnt}, CC: {cc_cnt};  "
        f"{any_cnt} proteins with ≥1 PFP entry, {none_cnt} without"
    )


PlasmoDB-68_PknowlesiH: 5328 total proteins  →  PFP MF: 2220, BP: 2592, CC: 1950;  3261 proteins with ≥1 PFP entry, 2067 without
PlasmoDB-68_PadleriG01: 5325 total proteins  →  PFP MF: 2422, BP: 2863, CC: 2453;  3609 proteins with ≥1 PFP entry, 1716 without
PlasmoDB-68_PvinckeibrucechwattiDA: 5225 total proteins  →  PFP MF: 2259, BP: 2680, CC: 2526;  3456 proteins with ≥1 PFP entry, 1769 without
PlasmoDB-68_PbergheiANKA: 4958 total proteins  →  PFP MF: 1992, BP: 2272, CC: 1712;  2908 proteins with ≥1 PFP entry, 2050 without
PlasmoDB-68_Pgallinaceum8A: 5286 total proteins  →  PFP MF: 2172, BP: 2607, CC: 2073;  3297 proteins with ≥1 PFP entry, 1989 without
PlasmoDB-68_PcoatneyiHackeri: 5516 total proteins  →  PFP MF: 2835, BP: 3247, CC: 2845;  4013 proteins with ≥1 PFP entry, 1503 without
PlasmoDB-68_PmalariaeUG01: 5942 total proteins  →  PFP MF: 2478, BP: 2868, CC: 2035;  3511 proteins with ≥1 PFP entry, 2431 without
PlasmoDB-68_Pyoeliiyoelii17XNL2023: 7047 total proteins  →  PFP MF: 21

In [59]:
import pickle
import pandas as pd
from pathlib import Path

# CONFIG
dict_dir  = Path("genomes_to_annotate_with_PlasmoFP/with_PFP_predictions_complete_2")
out_dir   = Path("genomes_to_annotate_with_PlasmoFP/gene_dicts_out_complete_and_filtered_2/annotation_summaries_2")
out_dir.mkdir(exist_ok=True)

FDRS     = [0.01, 0.05, 0.10, 0.20, 0.30]
GO_KEYS  = [
    ("GO Function", "GO IEA Function"),
    ("GO Process",  "GO IEA Process"),
    ("GO Component","GO IEA Component"),
]
PFP_KEYS = {"MF":"PFP MF","BP":"PFP BP","CC":"PFP CC"}

def triage(triples):
    nonempty = sum(1 for x in triples if len(x) > 0)
    return ("existing" if nonempty == 3
            else "none"    if nonempty == 0
            else "partial")

for pkl_fp in sorted(dict_dir.glob("PlasmoDB-68_*_gene_dict_with_PFP.pkl")):
    species   = pkl_fp.stem.rsplit("_gene_dict",1)[0]
    gene_dict = pickle.load(open(pkl_fp, "rb"))

    # initialize counters
    summary = {
        "Species":       species,
        "Total_genes":   len(gene_dict),
        "GO_existing":   0,
        "GO_partial":    0,
        "GO_none":       0,
    }
    for f in FDRS:
        summary[f"PFP_{f:.2f}_existing"] = 0
        summary[f"PFP_{f:.2f}_partial"]  = 0
        summary[f"PFP_{f:.2f}_none"]     = 0

    # iterate **only** over keys in gene_dict
    for prot, rec in gene_dict.items():
        # A) original GO coverage
        go_sets = [ set(rec[a])|set(rec[b]) for (a,b) in GO_KEYS ]
        cat_go  = triage(go_sets)
        summary[f"GO_{cat_go}"] += 1

        # B) augmented PFP coverage, at each FDR
        for f in FDRS:
            comb = []
            for (a,b), asp in zip(GO_KEYS, ["MF","BP","CC"]):
                orig = set(rec[a])|set(rec[b])
                pfp  = rec.get(PFP_KEYS[asp], {}).get(f, set())
                comb.append(orig | pfp)
            cat_pfp = triage(comb)
            summary[f"PFP_{f:.2f}_{cat_pfp}"] += 1

    # write out one‐row CSV
    df     = pd.DataFrame([summary])
    out_fp = out_dir / f"{species}_annotation_summary.csv"
    df.to_csv(out_fp, index=False)
    print(f"Wrote {out_fp.name}")


Wrote PlasmoDB-68_PadleriG01_annotation_summary.csv
Wrote PlasmoDB-68_PbergheiANKA_annotation_summary.csv
Wrote PlasmoDB-68_PblacklockiG01_annotation_summary.csv
Wrote PlasmoDB-68_Pchabaudichabaudi_annotation_summary.csv
Wrote PlasmoDB-68_PcoatneyiHackeri_annotation_summary.csv
Wrote PlasmoDB-68_PcynomolgiM_annotation_summary.csv
Wrote PlasmoDB-68_Pfalciparum3D7_annotation_summary.csv
Wrote PlasmoDB-68_PfragileNilgiri_annotation_summary.csv
Wrote PlasmoDB-68_PgaboniG01_annotation_summary.csv
Wrote PlasmoDB-68_Pgallinaceum8A_annotation_summary.csv
Wrote PlasmoDB-68_PinuiSanAntonio1_annotation_summary.csv
Wrote PlasmoDB-68_PknowlesiH_annotation_summary.csv
Wrote PlasmoDB-68_PmalariaeUG01_annotation_summary.csv
Wrote PlasmoDB-68_PovalecurtisiGH01_annotation_summary.csv
Wrote PlasmoDB-68_PovalewallikeriPowCR01_annotation_summary.csv
Wrote PlasmoDB-68_PreichenowiCDC_annotation_summary.csv
Wrote PlasmoDB-68_PvinckeibrucechwattiDA_annotation_summary.csv
Wrote PlasmoDB-68_PvivaxSal1_annotation

In [60]:
import pandas as pd
from pathlib import Path

# Path to your per‐species summaries
summaries_dir = Path("genomes_to_annotate_with_PlasmoFP/gene_dicts_out_complete_and_filtered_2/annotation_summaries_2")

# Glob all CSVs
csv_files = sorted(summaries_dir.glob("*_annotation_summary.csv"))

# Read and concatenate
dfs = []
for fp in csv_files:
    df = pd.read_csv(fp)
    dfs.append(df)
master_df = pd.concat(dfs, ignore_index=True)

# Optional: sort by Species
master_df = master_df.sort_values("Species").reset_index(drop=True)
#drop duplicate rows 
master_df = master_df.drop_duplicates(subset=["Species"])
# Save master CSV
out_fp = summaries_dir / "master_annotation_summary.csv"
master_df.to_csv(out_fp, index=False)

print(f"Wrote master summary with {len(master_df)} species to {out_fp}")
print(master_df)


Wrote master summary with 19 species to genomes_to_annotate_with_PlasmoFP/gene_dicts_out_complete_and_filtered_2/annotation_summaries_2/master_annotation_summary.csv
                               Species  Total_genes  GO_existing  GO_partial  \
0               PlasmoDB-68_PadleriG01         5325          872        2748   
2             PlasmoDB-68_PbergheiANKA         4958         1201        2487   
4           PlasmoDB-68_PblacklockiG01         5100          828        2679   
6        PlasmoDB-68_Pchabaudichabaudi         5199         1076        2646   
8         PlasmoDB-68_PcoatneyiHackeri         5516          726        2709   
10             PlasmoDB-68_PcynomolgiM         6068          439        1999   
12          PlasmoDB-68_Pfalciparum3D7         5389         2278        2265   
14         PlasmoDB-68_PfragileNilgiri         5672          732        2722   
16              PlasmoDB-68_PgaboniG01         5134          847        2561   
18          PlasmoDB-68_Pgallinace