In [1]:
import molsberry.core as core_
import molsberry.modules.generic as generic_
import molsberry.modules.cuby4 as cuby4_
import molsberry.modules.mopac as mopac_
from molsberry.core.data.collections import BatchedData

class MyPipe2(core_.Pipeline):
    name = "pipe2"
    display_name = "My Pipe 2"
    def build(self):
        conf_prot = cuby4_.Cuby4AMBERInterfaceConfig(
            home="/home/arian/mambaforge/envs/ASH"
        )
        conf_lig = cuby4_.Cuby4MOPACInterfaceConfig(
            mozyme=False
        )
        conf_qmmm = cuby4_.Cuby4QMMMInterfaceConfig(
            qm_config=conf_lig,
            mm_config=conf_prot
        )
        mopac_conf_lig_cuby = cuby4_.Cuby4MOPACInterfaceConfig(
            mozyme=False, keywords=[]
        )
        mopac_conf_lig = mopac_.configs.MOPACConfig()
        conf_qmmm.config["job_cleanup"] = "no"
        
        self.add_block(core_.InputBlock(["ligand", "protein"]), "input")

        self.add_block(generic_.RDKitLigandPocketLocator(radius=10), "poclocator")
        self.add_connection("input", "ligand", "poclocator", "ligand")

        self.add_block(generic_.RDKitPocketIsolator(), "isolator")
        self.add_connection("input", "protein", "isolator", "protein")
        self.add_connection("poclocator", "location", "isolator", "location")

        self.add_block(generic_.math.Adder(2), 'adder1')
        self.add_block(cuby4_.Cuby4AMBEREnergyCalculator(conf_prot), 'protenergy')
        self.add_connection('isolator', 'pocket', 'protenergy', 'molecules')
        self.add_connection('protenergy', 'energy', 'adder1', 'num1')

        self.add_block(cuby4_.Cuby4MOPACEnergyCalculator(mopac_conf_lig_cuby), 'ligandenergy')
        self.add_connection('input', 'ligand', 'ligandenergy', 'molecules')
        self.add_connection('ligandenergy', 'energy', 'adder1', 'num2')

        self.add_block(generic_.math.Subtractor(), 'subtract1')
        self.add_connection('adder1', 'num_out', 'subtract1', 'num2')

        self.add_block(cuby4_.Cuby4QMMMEnergyCalculator(conf_qmmm), "qmmm_energy") #optimize
        self.add_connection("input", "ligand", "qmmm_energy", "qm_region")
        self.add_connection("isolator", "pocket", "qmmm_energy", "nonqm_region")
        self.add_connection('qmmm_energy', 'energy', 'subtract1', 'num1')

        self.add_block(core_.OutputBlock(["e_interaction", "lig_e", "score"]), "output")
        self.add_block(generic_.math.Adder(2), "adder2")
        self.add_connection('subtract1', 'num_out', 'output', 'e_interaction')
        self.add_connection('subtract1', 'num_out', 'adder2', 'num1')

        self.add_block(generic_.math.Subtractor(), 'subtract2')
        self.add_block(cuby4_.Cuby4MOPACEnergyOptimizer(mopac_conf_lig_cuby, 'LBFGS'), 'ligopt')
        self.add_connection('input', 'ligand', 'ligopt', 'molecules')
        self.add_connection('ligopt', 'energy', 'subtract2', 'num2')
        self.add_connection('ligandenergy', 'energy', 'subtract2', 'num1')

        self.add_connection('subtract2', 'num_out', 'output', 'lig_e')
        self.add_connection('subtract2', 'num_out', 'adder2', 'num2')

        self.add_connection('adder2', 'num_out', 'output', 'score')





In [2]:
pipe = MyPipe2(base_dir='./example1_out')
result = pipe.execute(
    ligand=core_.MoleculeData(core_.SDFPathRep("../PL-REX/003-CK2/structures_pl-rex/1J91/ligand.sdf")),
    protein=core_.MoleculeData(core_.PDBPathRep("../PL-REX/003-CK2/structures_pl-rex/1J91/protein.pdb"))
)


>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
>>                STARTED: My Pipe 2                 >>
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

[Running Pipe Block: (poclocator) (RDKit) Ligand Pocket Locator]
<class 'molsberry.core.data.molecules.MoleculeData'>
<molsberry.core.data.molecules.MoleculeData object at 0x7f0fbd749390> <class 'molsberry.core.data.molecules.MoleculeData'>
<class 'molsberry.modules.rdkit.representations.RDKitMolRep'>

[Running Pipe Block: (isolator) (RDKit) Pocket Isolator]
<class 'molsberry.core.data.molecules.MoleculeData'>
<molsberry.core.data.molecules.MoleculeData object at 0x7f0fbd749a50> <class 'molsberry.core.data.molecules.MoleculeData'>
<class 'molsberry.modules.rdkit.representations.RDKitProtRep'>
<class 'molsberry.core.data.generic.LocationData'>
<molsberry.core.data.generic.LocationData object at 0x7f0fbd74bb90> <class 'molsberry.core.data.generic.LocationData'>
<class 'molsberry.modules.generic.pocket.locationrep.PocketL

In [3]:
print(list(result['e_interaction'].get_representation_content()),
list(result['score'].get_representation_content()))

[-40.80725200000006] [-40.13057200000007]


In [4]:
import glob
ligand_paths = glob.glob('/mnt/e/molsberry/PL-REX/003-CK2/structures_pl-rex/*/ligand.sdf')
ligs = [core_.MoleculeData(core_.SDFPathRep(ligand)) for ligand in ligand_paths]
ligand = BatchedData(ligs)
protein_paths = glob.glob('/mnt/e/molsberry/PL-REX/003-CK2/structures_pl-rex/*/protein.pdb')
protein = [core_.MoleculeData(core_.PDBPathRep(protein)) for protein in protein_paths]
protein = BatchedData(protein)

pipeline = MyPipe2(base_dir="./example2_out")
out1 = pipeline.execute(ligand = ligand, protein = protein)



>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
>>                STARTED: My Pipe 2                 >>
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

[Running Pipe Block: (poclocator) (RDKit) Ligand Pocket Locator]
<class 'molsberry.core.data.molecules.MoleculeData'>
<molsberry.core.data.molecules.MoleculeData object at 0x7f0f4a882990> <class 'molsberry.core.data.molecules.MoleculeData'>
<class 'molsberry.modules.rdkit.representations.RDKitMolRep'>
<class 'molsberry.core.data.molecules.MoleculeData'>
<molsberry.core.data.molecules.MoleculeData object at 0x7f0f42c020d0> <class 'molsberry.core.data.molecules.MoleculeData'>
<class 'molsberry.modules.rdkit.representations.RDKitMolRep'>
<class 'molsberry.core.data.molecules.MoleculeData'>
<molsberry.core.data.molecules.MoleculeData object at 0x7f0f42c02050> <class 'molsberry.core.data.molecules.MoleculeData'>
<class 'molsberry.modules.rdkit.representations.RDKitMolRep'>
<class 'molsberry.core.data.molecules.MoleculeData'

In [5]:
array1 = core_.NpData(core_.NpArrayRep(list(out1['score'].get_representation_content())))
experimental_values = []
with open('/mnt/e/molsberry/PL-REX/003-CK2/experimental_dG.txt', 'r') as file:
    for line in file:
        parts = line.split()
        for part in parts:
            try:  
                experimental_values.append(float(part))  
            except ValueError:  
                continue  
array2 = core_.NpData(core_.NpArrayRep(experimental_values)) 

class Correlation(core_.Pipeline):
    name = "correlation coefficient"
    display_name = "Correlation Coefficient Pilpeline"
    def build(self):
        self.add_block(core_.InputBlock(['array1', 'array2']), "input")
        self.add_block(generic_.sklearn.CorrCoef(), 'corrcoef')
        self.add_block(core_.OutputBlock(["result"]), "output")
        self.add_connection('input', 'array1', 'corrcoef', 'npa1')
        self.add_connection('input', 'array2', 'corrcoef', 'npa2')
        self.add_connection('corrcoef', 'num_out', 'output', 'result')

pipeline = Correlation(base_dir="./example3_out")
out1_cor = pipeline.execute(array1 = array1, array2 = array2)


>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
>>    STARTED: Correlation Coefficient Pilpeline     >>
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

[Running Pipe Block: (corrcoef) CorrCoeff calculator]
<class 'molsberry.core.data.generic.NpData'>
<molsberry.core.data.generic.NpData object at 0x7f0f2c7dc690> <class 'molsberry.core.data.generic.NpData'>
<class 'molsberry.core.data.generic.NpArrayRep'>
<class 'molsberry.core.data.generic.NpData'>
<molsberry.core.data.generic.NpData object at 0x7f0f2c9ddd50> <class 'molsberry.core.data.generic.NpData'>
<class 'molsberry.core.data.generic.NpArrayRep'>

[Running Pipe Block: (output) Output Block]

<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
<<     ENDED: Correlation Coefficient Pilpeline      <<
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<


In [6]:
out1_cor['result'].get_representation_content()[0]

-0.1510341353449064

In [7]:
list(out1['score'].get_representation_content())

[-35.3353250000001,
 -40.105961000000136,
 -47.414306999999994,
 -54.21065600000007,
 -52.57330400000002,
 -30.102224999999976,
 -30.496935000000008,
 -41.906554000000014,
 -31.630356000000024,
 -35.89559899999988,
 -27.569004000000007,
 -33.43056500000006,
 -213.6267760000001,
 -55.620577000000004,
 -35.317542,
 -60.933788000000035]

In [8]:
experimental_values

[-7.854,
 -8.783,
 -8.384,
 -8.369,
 -8.862,
 -10.085,
 -9.822,
 -9.609,
 -9.367,
 -9.196,
 -8.954,
 -10.047,
 -9.063,
 -8.995,
 -10.485,
 -8.062]

In [9]:
import glob
ligand_paths = glob.glob('/mnt/e/molsberry/PL-REX/005-Cath-D/structures_pl-rex/*/ligand.sdf')
ligs = [core_.MoleculeData(core_.SDFPathRep(ligand)) for ligand in ligand_paths]
ligand = BatchedData(ligs)
protein_paths = glob.glob('/mnt/e/molsberry/PL-REX/005-Cath-D/structures_pl-rex/*/protein.pdb')
protein = [core_.MoleculeData(core_.PDBPathRep(protein)) for protein in protein_paths]
protein = BatchedData(protein)

pipeline = MyPipe2(base_dir="./example2_out")
out2 = pipeline.execute(ligand = ligand, protein = protein)



>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
>>                STARTED: My Pipe 2                 >>
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

[Running Pipe Block: (poclocator) (RDKit) Ligand Pocket Locator]
<class 'molsberry.core.data.molecules.MoleculeData'>
<molsberry.core.data.molecules.MoleculeData object at 0x7f0efe59ec90> <class 'molsberry.core.data.molecules.MoleculeData'>
<class 'molsberry.modules.rdkit.representations.RDKitMolRep'>
<class 'molsberry.core.data.molecules.MoleculeData'>
<molsberry.core.data.molecules.MoleculeData object at 0x7f0f2faf2dd0> <class 'molsberry.core.data.molecules.MoleculeData'>
<class 'molsberry.modules.rdkit.representations.RDKitMolRep'>
<class 'molsberry.core.data.molecules.MoleculeData'>
<molsberry.core.data.molecules.MoleculeData object at 0x7f0f2faf2a10> <class 'molsberry.core.data.molecules.MoleculeData'>
<class 'molsberry.modules.rdkit.representations.RDKitMolRep'>
<class 'molsberry.core.data.molecules.MoleculeData'

KeyboardInterrupt: 

In [10]:
array1 = core_.NpData(core_.NpArrayRep(list(out['score'].get_representation_content())))
experimental_values = []
with open('/mnt/e/molsberry/PL-REX/005-Cath-D/experimental_dG.txt', 'r') as file:
    for line in file:
        parts = line.split()
        for part in parts:
            try:  
                experimental_values.append(float(part))  
            except ValueError:  
                continue  
array2 = core_.NpData(core_.NpArrayRep(experimental_values)) 

class Correlation(core_.Pipeline):
    name = "correlation coefficient"
    display_name = "Correlation Coefficient Pilpeline"
    def build(self):
        self.add_block(core_.InputBlock(['array1', 'array2']), "input")
        self.add_block(generic_.sklearn.CorrCoef(), 'corrcoef')
        self.add_block(core_.OutputBlock(["result"]), "output")
        self.add_connection('input', 'array1', 'corrcoef', 'npa1')
        self.add_connection('input', 'array2', 'corrcoef', 'npa2')
        self.add_connection('corrcoef', 'num_out', 'output', 'result')

pipeline = Correlation(base_dir="./example3_out")
out2_cor = pipeline.execute(array1 = array1, array2 = array2)

NameError: name 'out' is not defined

In [11]:
out2_cor['result'].get_representation_content()[0]

NameError: name 'out2_cor' is not defined

In [12]:
import glob
ligand_paths = glob.glob('/mnt/e/molsberry/PL-REX/001-CA2/structures_pl-rex/*/ligand.sdf')
ligs = [core_.MoleculeData(core_.SDFPathRep(ligand)) for ligand in ligand_paths]
ligand = BatchedData(ligs)
protein_paths = glob.glob('/mnt/e/molsberry/PL-REX/001-CA2/structures_pl-rex/*/protein.pdb')
protein = [core_.MoleculeData(core_.PDBPathRep(protein)) for protein in protein_paths]
protein = BatchedData(protein)

pipeline = MyPipe2(base_dir="./example2_out")
out3 = pipeline.execute(ligand = ligand, protein = protein)



>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
>>                STARTED: My Pipe 2                 >>
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

[Running Pipe Block: (poclocator) (RDKit) Ligand Pocket Locator]
<class 'molsberry.core.data.molecules.MoleculeData'>
<molsberry.core.data.molecules.MoleculeData object at 0x7f0f41bb02d0> <class 'molsberry.core.data.molecules.MoleculeData'>
<class 'molsberry.modules.rdkit.representations.RDKitMolRep'>
<class 'molsberry.core.data.molecules.MoleculeData'>
<molsberry.core.data.molecules.MoleculeData object at 0x7f0f248807d0> <class 'molsberry.core.data.molecules.MoleculeData'>
<class 'molsberry.modules.rdkit.representations.RDKitMolRep'>
<class 'molsberry.core.data.molecules.MoleculeData'>
<molsberry.core.data.molecules.MoleculeData object at 0x7f0f4a928d50> <class 'molsberry.core.data.molecules.MoleculeData'>
<class 'molsberry.modules.rdkit.representations.RDKitMolRep'>
<class 'molsberry.core.data.molecules.MoleculeData'

TypeError: 'ABCMeta' object is not iterable

In [13]:
array1 = core_.NpData(core_.NpArrayRep(list(out3['score'].get_representation_content())))
experimental_values = []
with open('/mnt/e/molsberry/PL-REX/001-CA2/experimental_dG.txt', 'r') as file:
    for line in file:
        parts = line.split()
        for part in parts:
            try:  
                experimental_values.append(float(part))  
            except ValueError:  
                continue  
array2 = core_.NpData(core_.NpArrayRep(experimental_values)) 

class Correlation(core_.Pipeline):
    name = "correlation coefficient"
    display_name = "Correlation Coefficient Pilpeline"
    def build(self):
        self.add_block(core_.InputBlock(['array1', 'array2']), "input")
        self.add_block(generic_.sklearn.CorrCoef(), 'corrcoef')
        self.add_block(core_.OutputBlock(["result"]), "output")
        self.add_connection('input', 'array1', 'corrcoef', 'npa1')
        self.add_connection('input', 'array2', 'corrcoef', 'npa2')
        self.add_connection('corrcoef', 'num_out', 'output', 'result')

pipeline = Correlation(base_dir="./example3_out")
out3_cor = pipeline.execute(array1 = array1, array2 = array2)

NameError: name 'out3' is not defined

In [14]:
out3_cor['result'].get_representation_content()[0]

NameError: name 'out3_cor' is not defined

In [15]:
import glob
ligand_paths = glob.glob('/mnt/e/molsberry/PL-REX/009-CDK2/structures_pl-rex/*/ligand.sdf')
ligs = [core_.MoleculeData(core_.SDFPathRep(ligand)) for ligand in ligand_paths]
ligand = BatchedData(ligs)
protein_paths = glob.glob('/mnt/e/molsberry/PL-REX/009-CDK2/structures_pl-rex/*/protein.pdb')
protein = [core_.MoleculeData(core_.PDBPathRep(protein)) for protein in protein_paths]
protein = BatchedData(protein)

pipeline = MyPipe2(base_dir="./example2_out")
out4 = pipeline.execute(ligand = ligand, protein = protein)



>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
>>                STARTED: My Pipe 2                 >>
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

[Running Pipe Block: (poclocator) (RDKit) Ligand Pocket Locator]
<class 'molsberry.core.data.molecules.MoleculeData'>
<molsberry.core.data.molecules.MoleculeData object at 0x7f0efe01d2d0> <class 'molsberry.core.data.molecules.MoleculeData'>
<class 'molsberry.modules.rdkit.representations.RDKitMolRep'>
<class 'molsberry.core.data.molecules.MoleculeData'>
<molsberry.core.data.molecules.MoleculeData object at 0x7f0f02e48790> <class 'molsberry.core.data.molecules.MoleculeData'>
<class 'molsberry.modules.rdkit.representations.RDKitMolRep'>
<class 'molsberry.core.data.molecules.MoleculeData'>
<molsberry.core.data.molecules.MoleculeData object at 0x7f0f02e480d0> <class 'molsberry.core.data.molecules.MoleculeData'>
<class 'molsberry.modules.rdkit.representations.RDKitMolRep'>
<class 'molsberry.core.data.molecules.MoleculeData'

ValueError: 

In [None]:
array1 = core_.NpData(core_.NpArrayRep(list(out3['score'].get_representation_content())))
experimental_values = []
with open('/mnt/e/molsberry/PL-REX/009-CDK2/experimental_dG.txt', 'r') as file:
    for line in file:
        parts = line.split()
        for part in parts:
            try:  
                experimental_values.append(float(part))  
            except ValueError:  
                continue  
array2 = core_.NpData(core_.NpArrayRep(experimental_values)) 

class Correlation(core_.Pipeline):
    name = "correlation coefficient"
    display_name = "Correlation Coefficient Pilpeline"
    def build(self):
        self.add_block(core_.InputBlock(['array1', 'array2']), "input")
        self.add_block(generic_.sklearn.CorrCoef(), 'corrcoef')
        self.add_block(core_.OutputBlock(["result"]), "output")
        self.add_connection('input', 'array1', 'corrcoef', 'npa1')
        self.add_connection('input', 'array2', 'corrcoef', 'npa2')
        self.add_connection('corrcoef', 'num_out', 'output', 'result')

pipeline = Correlation(base_dir="./example3_out")
out3_cor = pipeline.execute(array1 = array1, array2 = array2)

NameError: name 'out3' is not defined

In [9]:
out3_cor['result'].get_representation_content()[0]

NameError: name 'out3_cor' is not defined

In [None]:
import glob
ligand_paths = glob.glob('/mnt/e/molsberry/PL-REX/010-MMP12/structures_pl-rex/*/ligand.sdf')
ligs = [core_.MoleculeData(core_.SDFPathRep(ligand)) for ligand in ligand_paths]
ligand = BatchedData(ligs)
protein_paths = glob.glob('/mnt/e/molsberry/PL-REX/010-MMP12/structures_pl-rex/*/protein.pdb')
protein = [core_.MoleculeData(core_.PDBPathRep(protein)) for protein in protein_paths]
protein = BatchedData(protein)

pipeline = MyPipe2(base_dir="./example2_out")
out4 = pipeline.execute(ligand = ligand, protein = protein)


In [None]:
array1 = core_.NpData(core_.NpArrayRep(list(out3['score'].get_representation_content())))
experimental_values = []
with open('/mnt/e/molsberry/PL-REX/010-MMP12/experimental_dG.txt', 'r') as file:
    for line in file:
        parts = line.split()
        for part in parts:
            try:  
                experimental_values.append(float(part))  
            except ValueError:  
                continue  
array2 = core_.NpData(core_.NpArrayRep(experimental_values)) 

class Correlation(core_.Pipeline):
    name = "correlation coefficient"
    display_name = "Correlation Coefficient Pilpeline"
    def build(self):
        self.add_block(core_.InputBlock(['array1', 'array2']), "input")
        self.add_block(generic_.sklearn.CorrCoef(), 'corrcoef')
        self.add_block(core_.OutputBlock(["result"]), "output")
        self.add_connection('input', 'array1', 'corrcoef', 'npa1')
        self.add_connection('input', 'array2', 'corrcoef', 'npa2')
        self.add_connection('corrcoef', 'num_out', 'output', 'result')

pipeline = Correlation(base_dir="./example3_out")
out3_cor = pipeline.execute(array1 = array1, array2 = array2)