In [None]:
# Seperate PDB into antigen, and antibody files

import os

import incito_pipeline.util.io.pdb as pdb_p
import incito_pipeline.util.data_utility as du
import incito_pipeline.util.samples.date_filter as df
import incito_pipeline.util.cache as cache

path_to_dataset = "/home/cp864/repos/incito-pipeline/incito_pipeline/datasets/AF3_independent_test_Fv_GroundTruth/"
output_dir = "/spinning1/home/cp864/boltz_2_new/19_08_2025/dataset/seperated_pdbs/fv"

c = cache.Cache(pickle_name="date_cutoff_fv")

if c.is_pickle():
    print('loading from dir')
    pdb_dirs = c.load_pickle()
else:
    pdb_dirs = df.get_pdb_dirs_by_date(path_to_dataset)
    c.save_pickle(pdb_dirs)

for filename in pdb_dirs:
    filepath = os.path.join(path_to_dataset, filename)
    pdb_id = du.get_full_pdb_id(filepath)
    output_subdir = os.path.join(output_dir, pdb_id)

    pdb_p.separate_pdb(filepath, output_subdir)

In [None]:
# Seperate PDB into antigen, and antibody files

import os

import incito_pipeline.util.io.pdb as pdb_p
import incito_pipeline.util.samples.date_filter as df
import incito_pipeline.util.data_utility as du
import incito_pipeline.util.cache as cache

path_to_dataset = "/home/cp864/repos/incito-pipeline/incito_pipeline/datasets/AF3_independent_test_VHHs_GroundTruth/"
output_dir = "/spinning1/home/cp864/boltz_2_new/19_08_2025/dataset/seperated_pdbs/vhh"

c = cache.Cache(pickle_name="date_cutoff_vhh")

if c.is_pickle():
    print('loading from dir')
    pdb_dirs = c.load_pickle()
else:
    pdb_dirs = df.get_pdb_dirs_by_date(path_to_dataset)
    c.save_pickle(pdb_dirs)

for filename in pdb_dirs:
    filepath = os.path.join(path_to_dataset, filename)
    pdb_id = du.get_full_pdb_id(filepath)
    output_subdir = os.path.join(output_dir, pdb_id)

    pdb_p.separate_pdb(filepath, output_subdir)

In [None]:
# Create templates for Fv

import os
import tqdm

import incito_pipeline.util.data_utility as du
import incito_pipeline.util.samples.epitope_specification as e

original_pdbs = "/home/cp864/repos/incito-pipeline/incito_pipeline/datasets/AF3_independent_test_Fv_GroundTruth"

path_to_templates = "/spinning1/home/cp864/boltz_2_new/19_08_2025/dataset/seperated_pdbs/fv"
yaml_output_path = "/spinning1/home/cp864/boltz_2_new/19_08_2025/yaml/v2/pockets/fv"

dirs = du.get_dirs(path_to_templates)

for dir in dirs:
    dir_name = os.path.basename(dir).split('boltz_results_')[-1]

    ab_path = str(os.path.join(path_to_templates, dir, "ab.pdb"))
    ag_path = str(os.path.join(path_to_templates, dir, "ag.pdb"))

    path_to_original_pdb = os.path.join(original_pdbs, f"{dir_name}.pdb")

    e.create_yaml_with_retrieved_epitopes(pdb_file=path_to_original_pdb,
                                          save_dir=yaml_output_path,
                                          chain_map={"H": "H", "A": "A", "L": "L"},
                                          epitope_cutoff=4.5,
                                          msa=None,

                                          enforce_pocket=True,
                                          pocket_constraint_threshold=0.01,

                                          chain_filter_native=["H", "L"],
                                          chain_filter_model=["A"]
                                          )

Unpacking PDB file /home/cp864/repos/incito-pipeline/incito_pipeline/datasets/AF3_independent_test_Fv_GroundTruth/8g8d_A1-127_B2-110.pdb
Loading PDB from file /home/cp864/repos/incito-pipeline/incito_pipeline/datasets/AF3_independent_test_Fv_GroundTruth/8g8d_A1-127_B2-110.pdb
([RemarkLine(original_line='REMARK   6 printed by renumber_biopython_Fv_structure based on 8g8d seqid=8g8d_A1-127_B2-110', record_name='REMARK', remark_number=6, text='printed by renumber_biopython_Fv_structure based on 8g8d seqid=8g8d_A1-127_B2-110'), RemarkLine(original_line="REMARK   6 original_to_current_chain_mapping={'A': 'H', 'B': 'L', 'C': 'A'} Fv residues have been renumbered", record_name='REMARK', remark_number=6, text="original_to_current_chain_mapping={'H': 'H', 'A': 'A', 'L': 'L'}"), TitleLine(original_line='TITLE     CRYSTAL STRUCTURE OF DH1346 FAB IN COMPLEX WITH HIV PROXIMAL MPER 2 PEPTIDE ', record_name='TITLE', continuation='', title_text='CRYSTAL STRUCTURE OF DH1346 FAB IN COMPLEX WITH HIV PROX

In [None]:
# Create templates for VHHs
import tqdm
import os

import incito_pipeline.util.data_utility as du
import incito_pipeline.util.samples.epitope_specification as e

original_pdbs = "/home/cp864/repos/incito-pipeline/incito_pipeline/datasets/AF3_independent_test_VHHs_GroundTruth"

path_to_templates = "/spinning1/home/cp864/boltz_2_new/19_08_2025/dataset/seperated_pdbs/vhh"
yaml_output_path = "/spinning1/home/cp864/boltz_2_new/19_08_2025/yaml/v2/pockets/vhh"

dirs = du.get_dirs(path_to_templates)

for dir in dirs:
    dir_name = os.path.basename(dir).split('boltz_results_')[-1]

    ab_path = str(os.path.join(path_to_templates, dir, "ab.pdb"))
    ag_path = str(os.path.join(path_to_templates, dir, "ag.pdb"))

    path_to_original_pdb = os.path.join(original_pdbs, f"{dir_name}.pdb")

    e.create_yaml_with_retrieved_epitopes(pdb_file=path_to_original_pdb,
                                          save_dir=yaml_output_path,
                                          chain_map={"H": "H", "A": "A"},
                                          epitope_cutoff=4.5,
                                          msa=None,

                                          enforce_pocket=True,
                                          pocket_constraint_threshold=0.01,

                                          chain_filter_native=["H"],
                                          chain_filter_model=["A"])