In [1]:
import numpy as np
import os
from perses.utils.openeye import createOEMolFromSDF
from openeye import oechem

In [2]:
ligand_files = [x for x in os.listdir('data') if x[-4:] == '.sdf']


target = {}

for ligand_file in ligand_files:
    print(ligand_file)
    molecules = []
    for i in range(0,43):
        try:
            mol = createOEMolFromSDF(f'data/{ligand_file}',index=i)
            molecules.append(mol)
        except IndexError:
            break
    target[ligand_file.split('_')[0]] = molecules

Bace_ligands.sdf
Thrombin_ligands.sdf
CDK2_ligands.sdf
Tyk2_ligands.sdf
Jnk1_ligands.sdf
PTP1B_ligands.sdf
MCL1_ligands.sdf
p38_ligands.sdf


In [9]:
def is_ani1(mol):
    """
    Checks that molecule only contains CNOH atoms, for use with NN potentials
    Parameters
    ----------
    mol : oechem.oemol
        molecule to check
    Returns
    -------
    bool : 
        True if only CNOH, False otherwise
    """
    for atom in mol.GetAtoms():
        if atom.IsCarbon() or atom.IsHydrogen() or atom.IsOxygen() or atom.IsNitrogen():
            continue
        else:
            return False
    return True

def is_ani2(mol):
    """
    Checks that molecule only contains CNOH atoms, for use with NN potentials
    Parameters
    ----------
    mol : oechem.oemol
        molecule to check
    Returns
    -------
    bool : 
        True if only CNOH, False otherwise
    """
    for atom in mol.GetAtoms():
        if atom.IsCarbon() or atom.IsHydrogen() or atom.IsOxygen() or atom.IsNitrogen() or atom.IsHalogen() or atom.IsSulfur():
            continue
        else:
            return False
    return True

In [10]:
for name,molecules in target.items():
    print(name)
    total = len(molecules)
    useable_molecules = []
    for index, mol in enumerate(molecules):
        if is_ani1(mol):
            useable_molecules.append(index)
    print(f'{len(useable_molecules)} / {total}')
    if len(useable_molecules) >= 10:
        print(f'System {name} has enough molecules to run tests on')
        print(f'Indices: {useable_molecules}')
        print(f'Charge on ligands: {set([oechem.OENetCharge(molecules[i]) for i in useable_molecules])}')
    print()

Bace
17 / 36
System Bace has enough molecules to run tests on
Indices: [2, 4, 6, 12, 14, 16, 17, 18, 20, 21, 24, 26, 27, 29, 31, 34, 35]
Charge on ligands: {1}

Thrombin
4 / 11

CDK2
6 / 16

Tyk2
0 / 16

Jnk1
13 / 21
System Jnk1 has enough molecules to run tests on
Indices: [0, 1, 2, 4, 5, 6, 9, 10, 14, 15, 16, 17, 18]
Charge on ligands: {0}

PTP1B
0 / 23

MCL1
16 / 42
System MCL1 has enough molecules to run tests on
Indices: [0, 1, 3, 8, 9, 12, 14, 16, 21, 26, 30, 31, 32, 37, 38, 41]
Charge on ligands: {-1}

p38
1 / 34



In [11]:
for name,molecules in target.items():
    print(name)
    total = len(molecules)
    useable_molecules = []
    for index, mol in enumerate(molecules):
        if is_ani2(mol):
            useable_molecules.append(index)
    print(f'{len(useable_molecules)} / {total}')
    if len(useable_molecules) >= 10:
        print(f'System {name} has enough molecules to run tests on')
        print(f'Indices: {useable_molecules}')
        print(f'Charge on ligands: {set([oechem.OENetCharge(molecules[i]) for i in useable_molecules])}')
    print()

Bace
36 / 36
System Bace has enough molecules to run tests on
Indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]
Charge on ligands: {1}

Thrombin
11 / 11
System Thrombin has enough molecules to run tests on
Indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Charge on ligands: {1}

CDK2
16 / 16
System CDK2 has enough molecules to run tests on
Indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
Charge on ligands: {0}

Tyk2
16 / 16
System Tyk2 has enough molecules to run tests on
Indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
Charge on ligands: {0}

Jnk1
21 / 21
System Jnk1 has enough molecules to run tests on
Indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
Charge on ligands: {0}

PTP1B
23 / 23
System PTP1B has enough molecules to run tests on
Indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
Charge