In [2]:
%pip install biopython

Collecting biopython
  Downloading biopython-1.84-cp310-cp310-macosx_10_9_x86_64.whl.metadata (12 kB)
Downloading biopython-1.84-cp310-cp310-macosx_10_9_x86_64.whl (2.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/2.8 MB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.84

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [15]:
from Bio.PDB import PDBParser

# Dictionary for mapping three-letter residue codes to one-letter codes
three_to_one_dict = {
    "ALA": "A", "CYS": "C", "ASP": "D", "GLU": "E", "PHE": "F",
    "GLY": "G", "HIS": "H", "ILE": "I", "LYS": "K", "LEU": "L",
    "MET": "M", "ASN": "N", "PRO": "P", "GLN": "Q", "ARG": "R",
    "SER": "S", "THR": "T", "VAL": "V", "TRP": "W", "TYR": "Y",
    "HSE": "H", "MSE": "M", "SEC": "U"  # Include non-standard residues if needed
}

# Input PDB file and config file
pdb_file = "ionized.pdb"
config_file = "config.cfg"
output_dir = "output_foldx"

# Parse the PDB file to extract residues
parser = PDBParser(QUIET=True)
structure = parser.get_structure("protein", pdb_file)

positions = []
for model in structure:
    for chain in model:
        for residue in chain:
            if residue.get_id()[0] == " ":  # Skip heteroatoms and water
                res_name = residue.get_resname()
                res_id = residue.get_id()[1]
                print(f"Processing residue: {res_name} (ID: {res_id}) in chain {chain.id}")

                # Check if the chain ID matches 'U'
                if chain.id == "U":
                    # Convert three-letter code to one-letter code using the dictionary
                    res_name_upper = res_name.upper()
                    if res_name_upper in three_to_one_dict:
                        one_letter_res_name = three_to_one_dict[res_name_upper]
                        positions.append(f"{one_letter_res_name}U{res_id}h")
                    else:
                        # Print a warning for residues not in the dictionary but continue
                        print(f"Warning: {res_name_upper} is not a standard amino acid.")

# Check if any positions were added and print debug info
print(f"Total positions found: {len(positions)}")
if positions:
    positions_str = ",".join(positions)
    print(f"Positions string: {positions_str}")

    # Update the config file
    with open(config_file, "w") as cfg:
        cfg.write(f"""command=PositionScan
output-dir={output_dir}
pdb={pdb_file}
positions={positions_str}
""")
    print(f"Updated {config_file} with {len(positions)} positions.")
else:
    print("No positions were added. Please check for errors.")


Processing residue: LYS (ID: 1) in chain U
Processing residue: GLU (ID: 2) in chain U
Processing residue: THR (ID: 3) in chain U
Processing residue: ALA (ID: 4) in chain U
Processing residue: ALA (ID: 5) in chain U
Processing residue: ALA (ID: 6) in chain U
Processing residue: LYS (ID: 7) in chain U
Processing residue: PHE (ID: 8) in chain U
Processing residue: GLU (ID: 9) in chain U
Processing residue: ARG (ID: 10) in chain U
Processing residue: GLN (ID: 11) in chain U
Processing residue: HSE (ID: 12) in chain U
Processing residue: MET (ID: 13) in chain U
Processing residue: ASP (ID: 14) in chain U
Processing residue: SER (ID: 15) in chain U
Processing residue: SER (ID: 16) in chain U
Processing residue: THR (ID: 17) in chain U
Processing residue: SER (ID: 18) in chain U
Processing residue: ALA (ID: 19) in chain U
Processing residue: ALA (ID: 20) in chain U
Processing residue: SER (ID: 21) in chain U
Processing residue: SER (ID: 22) in chain U
Processing residue: SER (ID: 23) in chain