# This script take csv

In [None]:
import os
import requests
from tqdm import tqdm
from Bio import PDB
from Bio.PDB.Polypeptide import PPBuilder

def extract_sequence(chain):
    """Extract sequence from a chain."""
    ppb = PPBuilder()
    peptides = ppb.build_peptides(chain)
    sequence = ""
    for pp in peptides:
        sequence += pp.get_sequence()
    return str(sequence)

def query_abnum(sequence, scheme="imgt"):
    """Query AbNum server to get IMGT numbering."""
    url = "http://www.bioinf.org.uk/abs/abnum/abnum.cgi"
    params = {
        "plain": 1,
        "scheme": "-c",  # "-c" for Chothia/IMGT, "-k" for Kabat
        "aaseq": sequence
    }
    response = requests.get(url, params=params)
    if response.status_code != 200:
        raise Exception("Failed to query AbNum server")
    result = response.text.strip().splitlines()
    numbered_residues = [line.split() for line in result]
    return numbered_residues  # list of [position, residue]

def build_mapping(original_chain, abnum_result):
    """Build mapping from original residues to AbNum numbering."""
    residues = [res for res in original_chain if PDB.is_aa(res, standard=True)]
    if len(residues) != len(abnum_result):
        raise ValueError(f"Length mismatch: {len(residues)} residues vs {len(abnum_result)} AbNum results")

    mapping = {}
    for res, (pos, aa) in zip(residues, abnum_result):
        mapping[res] = pos
    return mapping

def renumber_structure(structure):
    """Renumber structure chains based on AbNum automatically."""
    chains = list(structure.get_chains())
    chain_seq_dict = {chain.id: extract_sequence(chain) for chain in chains}

    mappings = {}

    for chain_id, seq in chain_seq_dict.items():
        if len(seq) < 70:  # Quick check to skip obviously short chains
            print(f"Warning: Chain {chain_id} too short ({len(seq)} residues), skipping.")
            continue
        try:
            abnum_result = query_abnum(seq, scheme="imgt")
            mappings[chain_id] = build_mapping(structure[0][chain_id], abnum_result)
        except Exception as e:
            print(f"Warning: AbNum query failed for chain {chain_id}: {e}")
            continue

    # Apply renumbering
    for chain in chains:
        if chain.id in mappings:
            for residue in chain:
                if residue in mappings[chain.id]:
                    new_id = mappings[chain.id][residue]
                    pos = new_id[1:]  # remove first 'H' or 'L'
                    num = ""
                    insertion = " "
                    for c in pos:
                        if c.isdigit():
                            num += c
                        else:
                            insertion = c
                            break
                    resnum = int(num)
                    residue.id = (' ', resnum, insertion)

def renumber_pdb(input_pdb_path, output_pdb_path):
    parser = PDB.PDBParser(QUIET=True)
    io = PDB.PDBIO()
    structure = parser.get_structure("structure", input_pdb_path)

    try:
        renumber_structure(structure)
        io.set_structure(structure)
        io.save(output_pdb_path)
    except Exception as e:
        print(f"Error renumbering {input_pdb_path}: {e}")

# --- Define models and repeats ---
models = ["igfold", "immunebuilder"]
repeats = ["1", "2", "3"]

# --- Setup output folders ---
for model in models:
    for rep in repeats:
        os.makedirs(f"renumbered_output/{model}_{rep}", exist_ok=True)

# --- Main renumbering loop ---
for model in models:
    for rep in repeats:
        pdb_folder = f"predictions_output/{model}_{rep}"
        output_folder = f"renumbered_output/{model}_{rep}"

        if not os.path.exists(pdb_folder):
            print(f"Warning: Folder {pdb_folder} does not exist, skipping.")
            continue

        pdb_files = [f for f in os.listdir(pdb_folder) if f.endswith(".pdb")]

        for pdb_file in tqdm(pdb_files, desc=f"Processing {model}_{rep}"):
            input_path = os.path.join(pdb_folder, pdb_file)
            name = os.path.splitext(pdb_file)[0]
            output_path = os.path.join(output_folder, f"{name}_renumbered.pdb")

            renumber_pdb(input_path, output_path)

print("All PDB structures renumbered to IMGT format and saved.")


ImportError: cannot import name 'SCOPData' from 'Bio.Data' (/home/eva/miniconda3/envs/antibody_prediction_safe/lib/python3.10/site-packages/Bio/Data/__init__.py)