# Documentation for Benchmarking Process

## Autodock Vina

I downloaded 296 PDBs for selected proteins in _E. coli_ and 218 ligands in the SMILES format. The next step was to dock each protein against each ligand.

In [1]:
# Script to automate PDB file download from UniProt. This code allows me to download all of the PDBs at once into the benchmarking file. I then put all of these into one subfolder.

import requests

uniprot_ids = [
    "P0ABD5", "P0ABD8", "P24182", "P0A9Q5", "P0A6A8", "P69441", "P00957", "P11875", "P0A8M0", "P21889",
    "P0A940", "P0ABG1", "P0A6I0", "P0A6I6", "P69913", "P23886", "P21888", "P0A6K3", "P0A6P5", "P03004",
    "P0ACB0", "P0AEF0", "P10443", "P0ABS5", "P0A988", "P0A8J2", "P45568", "P77488", "P0A6P9", "P06616",
    "P0A6Q3", "P0A953", "P0AAI9", "P0AEK2", "P0A6R0", "P0AEK4", "P0A6Q6", "P0AB71", "P0AGD7", "P61949",
    "P0ABQ4", "P0A805", "P0ABH0", "P0A6S5", "P0AD68", "P0AEN4", "P06136", "P0ABG4", "P10121", "P0A9A6",
    "P0A6M8", "P0A9B2", "P31120", "P04805", "P00960", "P00961", "P60546", "P0A6F5", "P04079", "P0AES4",
    "P0AES6", "P06983", "P60906", "P28630", "P28631", "P08200", "P00956", "P0A705", "P0A707", "P22939",
    "P0AD57", "Q46893", "P62615", "P62623", "P60472", "P0A715", "P00803", "P07813", "P60955", "P15042",
    "P23930", "P61316", "P61320", "P0ADC3", "P75957", "P75958", "P0ADV1", "P0A9V1", "P31554", "P0ADC1",
    "P0AF98", "P0ADC6", "P0A722", "P10441", "P0A725", "P21645", "P43341", "P27300", "P00804", "P0A8N3",
    "P0AE18", "P0A817", "P25745", "P0AD65", "P0ABG7", "P0A9X4", "P16926", "P0ABH4", "P60752", "P22523",
    "P60293", "P0A749", "P08373", "P17952", "P14900", "P22188", "P11880", "P17443", "P22634", "P0AF16",
    "P00452", "P0AFF6", "P0A780", "P0AFG0", "P0A784", "P0AFI2", "P20083", "P08312", "P07395", "P0A7A7",
    "P00582", "P0A7A9", "P07013", "P16659", "P0A717", "P0A8K1", "P23830", "P0AB89", "P0A7E5", "P0A7E9",
    "P0AG30", "P0A7I7", "P25539", "P61714", "P0AG40", "P0A7X6", "P0C0R7", "P21513", "P0A7Y8", "P0A7L0",
    "P60422", "P60438", "P60723", "P62399", "P0AG55", "P0A7J3", "P0A7J7", "P0A7K2", "P0AA10", "P0ADY3",
    "P02413", "P0ADY7", "P0AG44", "P0C018", "P0A7K6", "P0A7L3", "P61175", "P0ADZ0", "P60624", "P68919",
    "P0A7L8", "P0AG51", "P0A7P5", "P0A7Q6", "P0A7Z4", "P0A8V2", "P0A8T7", "P0AG67", "P0A7V0", "P0A7V8",
    "P0A7W1", "P02358", "P02359", "P0A7R5", "P0A7R9", "P0A7S3", "P0A7S9", "P0AG59", "P0ADZ4", "P0AG63",
    "P0A7T7", "P0A7U3", "P0A7U7", "P0AEH1", "P39286", "P10408", "P0AG96", "P62395", "P0AGA2", "P0A8L1",
    "P0AG24", "P0AGE0", "P0ADG4", "P0A8M3", "P0A884", "P0A720", "P06612", "P0A873", "P00954", "P05852",
    "P0AF67", "P0A6P1", "P45531", "P0AGJ9", "P0A6A0", "P07118", "P0AC75", "P0A898", "P0AD27", "P25714",
    "P0A8I1", "P77173", "P24224", "P0A9Q9", "P0AC02", "P06709", "P61517", "P06961", "P0A6I3", "P0A6I9",
    "P0ABJ9", "P0A6L2", "P0AEE3", "P06966", "P06710", "P06968", "P0ACC3", "P23882", "P08192", "P24186",
    "P0AAI3", "P17169", "P0ACC7", "P00962", "P0A6S7", "P0A6F9", "P0A6X1", "P0ACB2", "P09126", "P0ACB4",
    "P23871", "P23893", "P69222", "P62617", "P62620", "P04951", "P0A7C2", "P00959", "P0A6W3", "P22524",
    "P0A752", "P18843", "P0A7B3", "P69924", "P42641", "P0A799", "P0ABF8", "P26647", "P0A7I0", "P07012",
    "P0ACC1", "P0A7D1", "P76062", "P0AFU8", "P0AG48", "P0A7M2", "P0A7M6", "P00579", "P0AGB6", "P0A7V3",
    "P0A7W7", "P0A7T3", "P68679", "P0AG90", "P68398", "P0AGG0", "P52097", "P76256", "P45748", "P0AGK1",
    "P0AAB4", "P45800", "P0A6P7", "P0A9P0", "P0ABQ0", "P0A800"
]

for uniprot_id in uniprot_ids:
    url = f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_id}-F1-model_v4.pdb"
    response = requests.get(url)
    if response.status_code == 200:
        with open(f"{uniprot_id}.pdb", "w") as f:
            f.write(response.text)
        print(f"Downloaded {uniprot_id}.pdb")
    else:
        print(f"Failed to download {uniprot_id}.pdb")

Downloaded P0ABD5.pdb
Downloaded P0ABD8.pdb
Downloaded P24182.pdb
Downloaded P0A9Q5.pdb
Downloaded P0A6A8.pdb
Downloaded P69441.pdb
Downloaded P00957.pdb
Downloaded P11875.pdb
Downloaded P0A8M0.pdb
Downloaded P21889.pdb
Downloaded P0A940.pdb
Downloaded P0ABG1.pdb
Downloaded P0A6I0.pdb
Downloaded P0A6I6.pdb
Downloaded P69913.pdb
Downloaded P23886.pdb
Downloaded P21888.pdb
Downloaded P0A6K3.pdb
Downloaded P0A6P5.pdb
Downloaded P03004.pdb
Downloaded P0ACB0.pdb
Downloaded P0AEF0.pdb
Downloaded P10443.pdb
Downloaded P0ABS5.pdb
Downloaded P0A988.pdb
Downloaded P0A8J2.pdb
Downloaded P45568.pdb
Downloaded P77488.pdb
Downloaded P0A6P9.pdb
Downloaded P06616.pdb
Downloaded P0A6Q3.pdb
Downloaded P0A953.pdb
Downloaded P0AAI9.pdb
Downloaded P0AEK2.pdb
Downloaded P0A6R0.pdb
Downloaded P0AEK4.pdb
Downloaded P0A6Q6.pdb
Downloaded P0AB71.pdb
Downloaded P0AGD7.pdb
Downloaded P61949.pdb
Downloaded P0ABQ4.pdb
Downloaded P0A805.pdb
Downloaded P0ABH0.pdb
Downloaded P0A6S5.pdb
Downloaded P0AD68.pdb
Downloaded

In [None]:
# This script is to convert SMILES for ligands to SDF files.

from rdkit import Chem
from rdkit.Chem import AllChem

smiles_data = [

]

writer = Chem.SDWriter("output.sdf")

for mol_id, smiles in smiles_data:
    mol = Chem.MolFromSmiles(smiles)
    if mol is not None:
        mol = Chem.AddHs(mol)
        AllChem.EmbedMolecule(mol)
        AllChem.MMFFOptimizeMolecule(mol)

        mol.SetProp("_Name", mol_id)
        writer.write(mol)
    else:
        print(f"failed to process SMILES: {smiles}")

writer.cloes()
print("SDF file generated: output.sdf")