In [2]:
import MDAnalysis as mda
import BioSimSpace as BSS
import numpy as np




Sending anonymous Sire usage statistics to http://siremol.org.
For more information, see http://siremol.org/analytics
To disable, set the environment variable 'SIRE_DONT_PHONEHOME' to 1
To see the information sent, set the environment variable 
SIRE_VERBOSE_PHONEHOME equal to 1. To silence this message, set
the environment variable SIRE_SILENT_PHONEHOME to 1.





# FEP workflow

## 1. Extract pdb file(s?) from RCSB PDB
## 2. Remove everything except protein and also add missing atoms 
## 3. From protein pdb extract ligand (with `MDAnalysis`)
## 4. Add hydrogen atoms with `openbabel`
## 5. Save ligand file
## 6. Run `antechamber` to create `.mol2` file 
## 7. Go to `fep_setup.ipynb`

# 1. Extract pdb files

- need to write a shell script for extracting these / use RCSB PDB 

```
gzip ftp://ftp.wwpdb.org/pub/pdb/data/structures/all/pdb/pdbXXXX.ent.gz
```

# 2 Remove everything except protein and also add missing atoms
- this should also be in a shell script

```
pdb4amber <protein>.pdb > <protein>.amber.pdb -y -p --add-missing-atoms
```

# 3 From protein pdb extract ligand (with `MDAnalysis`)
- create copy of pdb file that can then be used to extract ligand with `MDAnalysis`

In [18]:
universe = mda.Universe("pdb6d18.ent", format="PDB")
universe

<Universe with 2413 atoms>

In [29]:
# segments = universe.segments
# print(segments)
# atoms = universe.select_atoms("protein")
# alt_locations = atoms.altLocs
# for alt in alt_locations:
#     if alt != "":
#         print(alt)

In [40]:
ligand_residues = universe.select_atoms("resname GTV")
unique_residue_ids = list(set(ligand_residues.resids))
unique_residue_ids
ligand = ligand_residues.select_atoms(f"resid {unique_residue_ids[0]}")

In [41]:
for line in ligand:
    print(line)

<Atom 2072: C01 of type C of resname GTV, resid 301 and segid A and altLoc >
<Atom 2073: C02 of type C of resname GTV, resid 301 and segid A and altLoc >
<Atom 2074: C03 of type C of resname GTV, resid 301 and segid A and altLoc >
<Atom 2075: C04 of type C of resname GTV, resid 301 and segid A and altLoc >
<Atom 2076: C05 of type C of resname GTV, resid 301 and segid A and altLoc >
<Atom 2077: C06 of type C of resname GTV, resid 301 and segid A and altLoc >
<Atom 2078: C07 of type C of resname GTV, resid 301 and segid A and altLoc >
<Atom 2079: C08 of type C of resname GTV, resid 301 and segid A and altLoc >
<Atom 2080: C13 of type C of resname GTV, resid 301 and segid A and altLoc >
<Atom 2081: C14 of type C of resname GTV, resid 301 and segid A and altLoc >
<Atom 2082: C17 of type C of resname GTV, resid 301 and segid A and altLoc >
<Atom 2083: C18 of type C of resname GTV, resid 301 and segid A and altLoc >
<Atom 2084: O10 of type O of resname GTV, resid 301 and segid A and altLoc >

# 4. Save ligand file 


In [45]:
ligand_name = ligand.resnames[0].lower()
ligand.write(f"{ligand_name}.pdb")

# 5. Add hydrogen atoms with `openbabel`

`obabel -i pdb gtv.pdb -o pdb -O gtv_h.pdb -h`

In [6]:
protein = BSS.IO.readMolecules("6d18_clean.pdb")[0]

In [7]:
protein = BSS.Parameters.parameterise(protein, forcefield="ff14SB").getMolecule()