In [15]:
import numpy as np
import os
from perses.utils.openeye import createOEMolFromSDF
from openeye import oechem
from openforcefield.topology import Molecule

In [44]:
ligand_files = [x for x in os.listdir('data') if x[-4:] == '.sdf']


target = {}

for ligand_file in ligand_files:
    molecules = Molecule.from_file(f'data/{ligand_file}',allow_undefined_stereo=True)
    target[ligand_file.split('_')[0]] = molecules
    

Problematic atoms are:
Atom atomic num: 16, name: , idx: 44, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 10, aromatic: True, chiral: False
bond order: 2, chiral: False to atom atomic num: 8, name: , idx: 45, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 46, aromatic: False, chiral: False



In [46]:
def is_ani1(mol):
    """
    Checks that molecule only contains CNOH atoms, for use with NN potentials
    Parameters
    ----------
    mol : oechem.oemol
        molecule to check
    Returns
    -------
    bool : 
        True if only CNOH, False otherwise
    """
    for atom in mol.atoms:
        if atom.element.name in ['carbon','nitrogen','hydrogen','oxygen']:
            continue
        else:
            return False
    return True

def is_ani2(mol):
    """
    Checks that molecule only contains CNOH atoms, for use with NN potentials
    Parameters
    ----------
    mol : oechem.oemol
        molecule to check
    Returns
    -------
    bool : 
        True if only CNOHSClF, False otherwise
    """
    for atom in mol.atoms:
        if atom.element.name in ['carbon','nitrogen','hydrogen','oxygen','sulfur','chlorine','fluorine']:
            continue
        else:
            return False
    return True

In [54]:
ani1_compatible = 0
all_molecules = 0

for name,molecules in target.items():
    print(name)
    total = len(molecules)
    useable_molecules = []
    for index, mol in enumerate(molecules):
        if is_ani1(mol):
            useable_molecules.append(index)
    print(f'{len(useable_molecules)} / {total}')
    if len(useable_molecules) >= 3:
        print(f'System {name} has enough molecules to run tests on')
        print(f'Indices: {useable_molecules}')
        print(f'Charge on ligands: {set([molecules[i].total_charge._value for i in useable_molecules])}')
        ani1_compatible += len(useable_molecules)
    all_molecules += len(molecules)
    print()
    
print(f'{100*ani1_compatible / all_molecules:2f}')

Bace
17 / 36
System Bace has enough molecules to run tests on
Indices: [2, 4, 6, 12, 14, 16, 17, 18, 20, 21, 24, 26, 27, 29, 31, 34, 35]
Charge on ligands: {1.0}

Thrombin
4 / 11
System Thrombin has enough molecules to run tests on
Indices: [2, 5, 6, 7]
Charge on ligands: {1.0}

CDK2
6 / 16
System CDK2 has enough molecules to run tests on
Indices: [2, 3, 8, 9, 12, 15]
Charge on ligands: {0.0}

Tyk2
0 / 16

Jnk1
13 / 21
System Jnk1 has enough molecules to run tests on
Indices: [0, 1, 2, 4, 5, 6, 9, 10, 14, 15, 16, 17, 18]
Charge on ligands: {0.0}

PTP1B
0 / 23

MCL1
16 / 42
System MCL1 has enough molecules to run tests on
Indices: [0, 1, 3, 8, 9, 12, 14, 16, 21, 26, 30, 31, 32, 37, 38, 41]
Charge on ligands: {-1.0}

p38
1 / 34

28.140704


In [55]:
ani2_compatible = 0
# all_molecules = 0
for name,molecules in target.items():
    print(name)
    total = len(molecules)
    useable_molecules = []
    for index, mol in enumerate(molecules):
        if is_ani2(mol):
            useable_molecules.append(index)
    print(f'{len(useable_molecules)} / {total}')
    if len(useable_molecules) >= 3:
        print(f'System {name} has enough molecules to run tests on')
        print(f'Indices: {useable_molecules}')
        print(f'Charge on ligands: {set([molecules[i].total_charge._value for i in useable_molecules])}')
        ani2_compatible += len(useable_molecules)
#     all_molecules += len(molecules)
    print()
    
print(f'{100*ani2_compatible / all_molecules:2f}')

Bace
36 / 36
System Bace has enough molecules to run tests on
Indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]
Charge on ligands: {1.0}

Thrombin
9 / 11
System Thrombin has enough molecules to run tests on
Indices: [0, 2, 3, 4, 5, 6, 7, 8, 9]
Charge on ligands: {1.0}

CDK2
15 / 16
System CDK2 has enough molecules to run tests on
Indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15]
Charge on ligands: {0.0}

Tyk2
16 / 16
System Tyk2 has enough molecules to run tests on
Indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
Charge on ligands: {0.0}

Jnk1
19 / 21
System Jnk1 has enough molecules to run tests on
Indices: [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20]
Charge on ligands: {0.0}

PTP1B
3 / 23
System PTP1B has enough molecules to run tests on
Indices: [5, 16, 20]
Charge on ligands: {-2.0}

MCL1
42 / 42
System MCL1 has enough molecules to run tests o