In [1]:
from pdbfixer import PDBFixer
from openmm.app import PDBFile
from io import StringIO

input_pdb = '/home/eva/0_point_mutation/playground_mAb_DMS/1MLC.pdb'
output_pdb = '/home/eva/0_point_mutation/playground_mAb_DMS/1MLC_.pdb'

# Step 1: Run PDBFixer
fixer = PDBFixer(filename=input_pdb)
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms()
fixer.addMissingHydrogens(pH=7.4)  # Add hydrogens at physiological pH

# Step 2: Write to buffer
buffer = StringIO()
PDBFile.writeFile(fixer.topology, fixer.positions, buffer)
buffer.seek(0)

# Step 3: Rename chain A → H, B → M
with open(output_pdb, 'w') as out:
    for line in buffer:
        if line.startswith(('ATOM', 'HETATM')) and len(line) >= 22:
            chain_id = line[21]
            if chain_id == 'C':
                line = line[:21] + 'H' + line[22:]
            elif chain_id == 'D':
                line = line[:21] + 'L' + line[22:]
        out.write(line)

print(f"Fixed PDB with hydrogens and renamed chains written to: {output_pdb}")


Fixed PDB with hydrogens and renamed chains written to: /home/eva/0_point_mutation/playground_mAb_DMS/1MLC_.pdb


In [1]:
vh_seq = "QVQLQESGAEVMKPGASVKISCKATGYTFSTYWIEWVKQRPGHGLEWIGEILPGSGSTYYNEKFKGKATFTADTSSNTAYMQLSSLTSEDSAVYYCARGDGNYGYWGQGTTLTVSSASTTPPSVFPLAPGSAAQTNSMVTLGCLVKGYFPEPVTVTWNSGSLSSGVHTFPAVLQSDLYTLSSSVTVPSSPRPSETVTCNVAHPASSTKVDKKIVPRDC"
vl_seq = "DIELTQSPATLSVTPGDSVSLSCRASQSISNNLHWYQQKSHESPRLLIKYVSQSSSGIPSRFSGSGSGTDFTLSINSVETEDFGMYFCQQSNSWPRTFGGGTKLEIKRADAAPTVSIFPPSSEQLTSGGASVVCFLNNFYPKDINVKWKIDGSERQNGVLNSWTDQDSKDSTYSMSSTLTLTKDEYERHNSYTCEATHKTSTSPIVKSFNRNEC"
output_path = '/home/eva/0_point_mutation/playground_mAb_DMS/1MLC_abodybuilder2.pdb'
def run_abodybuilder2(vh_seq, vl_seq, output_path):
    from ImmuneBuilder import ABodyBuilder2, NanoBodyBuilder2
    predictor = ABodyBuilder2() if vl_seq else NanoBodyBuilder2()
    model = predictor.predict({'H': vh_seq, 'L': vl_seq} if vl_seq else {'H': vh_seq})
    model.save(output_path)
    print(f"Saved structure to {output_path}")

run_abodybuilder2(vh_seq, vl_seq, output_path)

Downloading weights for antibody_model_1...
Downloading weights for antibody_model_2...
Downloading weights for antibody_model_3...
Downloading weights for antibody_model_4...
Saved structure to /home/eva/0_point_mutation/playground_mAb_DMS/1MLC_abodybuilder2.pdb


In [19]:
from anarci import anarci

vh_seq = "QVQLQESGAEVMKPGASVKISCKATGYTFSTYWIEWVKQRPGHGLEWIGEILPGSGSTYYNEKFKGKATFTADTSSNTAYMQLSSLTSEDSAVYYCARGDGNYGYWGQGTTLTVSSASTTPPSVFPLAPGSAAQTNSMVTLGCLVKGYFPEPVTVTWNSGSLSSGVHTFPAVLQSDLYTLSSSVTVPSSPRPSETVTCNVAHPASSTKVDKKIVPRDC"
vl_seq = "DIELTQSPATLSVTPGDSVSLSCRASQSISNNLHWYQQKSHESPRLLIKYVSQSSSGIPSRFSGSGSGTDFTLSINSVETEDFGMYFCQQSNSWPRTFGGGTKLEIKRADAAPTVSIFPPSSEQLTSGGASVVCFLNNFYPKDINVKWKIDGSERQNGVLNSWTDQDSKDSTYSMSSTLTLTKDEYERHNSYTCEATHKTSTSPIVKSFNRNEC"

def extract_numbered_seq(seq, chain_type):
    if chain_type == 'H':
        sequences = [('vh', seq), ('vl', '')]
        index = 0
    elif chain_type == 'L':
        sequences = [('vh', ''), ('vl', seq)]
        index = 1
    else:
        raise ValueError("Invalid chain type. Please use 'H' or 'L'.")

    results = anarci(sequences, scheme="imgt", output=False)
    numbering = results[0]

    domain = numbering[index][0]
    if domain is None:
        raise ValueError(f"ANARCI failed to number the {chain_type}-chain.")

    # Return the list of tuples with (residue number, insertion code, amino acid)
    return domain

# Run ANARCI
vh_numbered = extract_numbered_seq(vh_seq, 'H')
vl_numbered = extract_numbered_seq(vl_seq, 'L')

vh_seq = ''.join([aa for (_, aa) in vh_numbered[0]])
vl_seq = ''.join([aa for (_, aa) in vl_numbered[0]])

vh_num = ''.join([f"{num}{appendix}" for ((num, appendix), aa) in vh_numbered[0]])
vl_num = ''.join([f"{num}{appendix}" for ((num, appendix), aa) in vl_numbered[0]])

# Example: print first few numbered residues
print("VH Numbered Sequence:", vh_num)
print("VL Numbered Sequence:", vl_num)
print("VH Sequence:", vh_seq)
print("VL Sequence:", vl_seq)


VH Numbered Sequence: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 
VL Numbered Sequence: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 
VH Sequence: QVQLQESGA-EVMKPGASVKISCKATGYTF----STYWIEWVKQRPGHGLEWIGEILPG--SGSTYYNEKFK-GKATFTADTSSNTAYMQLSSLTSEDSAVYYCARGDG----NYGYWGQGTTLTVSS
VL Seque