#Importing the libraries

In [None]:
import rdkit as rd
from rdkit import Chem
from rdkit.Chem import rdPartialCharges
from rdkit.Chem import GraphDescriptors
from rdkit.Chem import Descriptors
from rdkit.Chem import MolSurf
from rdkit.Chem import QED

#Declaring a class for single molecule exploration in rdkit. Defining SMILES for first molecule, furthermore, defining a string for that molecule which would return the various physicochemical properties of that molecule.

In [None]:
#Partial Charges

#Balaban J: The Balaban index is a graph index defined for a graph on nodes, edges, and connected components. Unless otherwise stated, hydrogen atoms are usually ignored in the computation of such indices.

#Bertz CT is a topological index meant to quantify “complexity” of molecules. It consists of a sum of two terms, one representing the complexity of the bonding, the other representing the complexity of the distribution of heteroatoms.

#MollogP calculates the logP of the molecule. The most commonly used measure of lipophilicity is LogP, the partition coefficient of a molecule between an aqueous and lipophilic phases, usually octanol and water.

#MolWt tells us the molecular weight of the molecule. It the average mass of a molecule of a compound compared to ¹/₁₂ the mass of carbon 12 and calculated as the sum of the atomic weights of the constituent atoms

#ExactMolWt gives us the exact molecular weight of the molecule. The exact mass of a molecule is also called the monoisotopic mass. It is calculated by adding the exact masses of the most abundant isotopes of the constituent elements

#HeavyAtomCount gives us the total number of the heavy atoms

#HeavyAtomMolWt gives us the Molecular Weight of the heavy atoms

#NumHeteroatoms gives the total number of Heteroatoms present in the molecule

#NumHAcceptors gives the total number of Hydrogen bond acceptors present in the molecule

#NumHDonors gives the total number of Hydrogen bond donors present in the molecule

#NumRotatableBonds gives the total number of rotatable bonds present in the molecule

#NumSpiroAtoms gives the total number of Spiro Atoms present in the molecule

#NumAliphaticRings gives the total number of Aliphatic rings present in the molecule

#NumAromaticRings gives the total number of Aromatic rings present in the molecule

#NumSaturatedRings gives the total number of Saturated rings present in the molecule

#The total ring count of the molecule tells us the rings present in the molecule

#NumAliphaticCarbocycles gives the total number of aliphatic carbocycles present in the molecule

#NumAromaticCarbocycles gives the total number of aromatic carbocycles present in the molecule

#NHOH Count gives the total number of NH-OH groups present in the molecule

#NO Count gives the total number of NO group present in the molecule

#TPSA (Topological Polar Surface Area) is the sum of the polar surfaces like the surfaces of oxygens, nitrogens and attached hydrogens.

#LabuteASA is Labute's Accessible Surface Area and it refers to the water accessible surface (in Å2) area using a probe radius of 1.4 Å. 

#PEOE_VSA(1-10): The Partial Equalization of Orbital Electronegativities (PEOE) method of calculating atomic partial charges [Gasteiger 1980] is a method in which charge is transferred between bonded atoms until equilibrium. To guarantee convergence, the amount of charge transferred at each iteration(1-10) is damped with an exponentially decreasing scale factor.

#SMR_VSA(1-10), that is the Molecular refractivity (including implicit hydrogens), this property is an atomic contribution model [Crippen 1999] that assumes the correct protonation state (washed structures).

#HallKierAlpha: The Kier and Hall are molecular shape indices [Hall 1991] compare the molecular graph with minimal and maximal molecular graphs, and are intended to capture different aspects of molecular shape.

#Kappa(1-3): kappa is used to denote the denticity (number of donor groups in a single ligand that bind to a central atom in a coordination complex) of the compound.

#Chi0: Chi is used to represent the Euler characteristic of a surface. The Euler characteristic (or Euler number, or Euler–Poincaré characteristic) is a topological invariant, a number that describes a topological space's shape or structure regardless of the way it is bent.  

#Phi0: Euler's totient/ phi function counts the positive integers up to a given integer n that are relatively prime to n.

#BCUT2D returns a 2D BCUT (eigen value hi, eigenvalue low) given the molecule and the specified atom properties

#SlogP_VSA is the log of the octanol/water partition coefficient (including implicit hydrogens). This property is an atomic contribution model [Crippen 1999] that calculates logP from the given structure; i.e., the correct protonation state (washed structures). Results may vary from the logP(o/w) descriptor.   

#AUTOCORR2D returns the 2D Autocorrelation descriptors vector

#Ipc returns the information content of the coefficients of the characteristic polynomial of the adjacency matrix of a hydrogen-suppressed graph of a molecule. ‘avg = 1’ returns the information content divided by the total population.

#FractionCSPE returns the fraction of carbons that are sp3 hybridized 

In [None]:
class SingleMoleculeExplorationRdkit():
    
    def __init__(self,smiles1):
        self.smiles1 = smiles1
        #print(PhysioChemicalProperties.__str__(self))
        #SingleMoleculeExplorationRdkit.smiles1_to_mol(self)
        self.mol1 = Chem.MolFromSmiles(self.smiles1)
        print("The structure of molecule 1 is ")
        
    def smiles1_to_mol(self):
        self.mol1 = Chem.MolFromSmiles(self.smiles1)
        return self.mol1

   # def __str__(self):
    #    return "Return the different properties of {smiles1}".format(self=self)
    #def return_mol1(self):
    #    return self.mol1
    
    def physiochemical_properties(self):
        PartialCharges = Chem.rdPartialCharges.ComputeGasteigerCharges(self.mol1)
        BalabanJ = Chem.GraphDescriptors.BalabanJ(self.mol1)
        BertzCT = Chem.GraphDescriptors.BertzCT(self.mol1)
        MolLogP = Chem.Descriptors.MolLogP(self.mol1)
        MolWt = Chem.Descriptors.MolWt(self.mol1)
        ExactMolWt = Chem.rdMolDescriptors.CalcExactMolWt(self.mol1)
        HeavyAtomCount = Chem.Lipinski.HeavyAtomCount(self.mol1)
        HeavyAtomMolWt = Chem.Descriptors.HeavyAtomMolWt(self.mol1)
        NumHeteroatoms = Chem.Lipinski.NumHeteroatoms(self.mol1)
        NumHAcceptors = Chem.Lipinski.NumHAcceptors(self.mol1)
        NumHDonors = Chem.Lipinski.NumHDonors(self.mol1)
        NumRotatableBonds = Chem.Lipinski.NumRotatableBonds(self.mol1)
        NumSpiroAtoms = Chem.rdMolDescriptors.CalcNumSpiroAtoms(self.mol1)
        NumAliphaticRings = Chem.Lipinski.NumAliphaticRings(self.mol1)
        NumAromaticRings = Chem.Lipinski.NumAliphaticRings(self.mol1)
        NumSaturatedRings = Chem.Lipinski.NumAliphaticRings(self.mol1)
        RingCount = Chem.Lipinski.RingCount(self.mol1)
        NumAliphaticCarbocycles = Chem.Lipinski.NumAliphaticCarbocycles(self.mol1)
        NumAliphaticHeterocycles = Chem.Lipinski.NumAliphaticHeterocycles(self.mol1)
        NHOHCount = Chem.Lipinski.NHOHCount(self.mol1) 
        NOCount = Chem.Lipinski.NOCount(self.mol1) 
        TPSA = Chem.MolSurf.TPSA(self.mol1)
        LabuteASA = Chem.MolSurf.LabuteASA(self.mol1)
        PEOE_VSA1 = Chem.MolSurf.PEOE_VSA1(self.mol1)
        PEOE_VSA2 = Chem.MolSurf.PEOE_VSA2(self.mol1)
        PEOE_VSA3 = Chem.MolSurf.PEOE_VSA3(self.mol1)
        PEOE_VSA4 = Chem.MolSurf.PEOE_VSA4(self.mol1)
        PEOE_VSA5 = Chem.MolSurf.PEOE_VSA5(self.mol1)
        PEOE_VSA6 = Chem.MolSurf.PEOE_VSA6(self.mol1)
        PEOE_VSA7 = Chem.MolSurf.PEOE_VSA7(self.mol1)
        PEOE_VSA8 = Chem.MolSurf.PEOE_VSA8(self.mol1)
        PEOE_VSA9 = Chem.MolSurf.PEOE_VSA9(self.mol1)
        PEOE_VSA10 = Chem.MolSurf.PEOE_VSA10(self.mol1)
        SMR_VSA1 = Chem.MolSurf.SMR_VSA1(self.mol1)
        SMR_VSA2 = Chem.MolSurf.SMR_VSA1(self.mol1)
        SMR_VSA3 = Chem.MolSurf.SMR_VSA1(self.mol1)
        SMR_VSA4 = Chem.MolSurf.SMR_VSA1(self.mol1)
        SMR_VSA5 = Chem.MolSurf.SMR_VSA1(self.mol1)
        SMR_VSA6 = Chem.MolSurf.SMR_VSA1(self.mol1)
        SMR_VSA7 = Chem.MolSurf.SMR_VSA1(self.mol1)
        SMR_VSA8 = Chem.MolSurf.SMR_VSA1(self.mol1)
        SMR_VSA9 = Chem.MolSurf.SMR_VSA1(self.mol1)
        SMR_VSA10 = Chem.MolSurf.SMR_VSA1(self.mol1)
        HallKierAlpha = Chem.GraphDescriptors.HallKierAlpha(self.mol1)
        Kappa1 = Chem.GraphDescriptors.Kappa1(self.mol1)
        Kappa2 = Chem.GraphDescriptors.Kappa2(self.mol1)
        Kappa3 = Chem.GraphDescriptors.Kappa3(self.mol1)
        Chi0 = Chem.GraphDescriptors.Chi0(self.mol1)
        Phi0 = Chem.rdMolDescriptors.CalcPhi(self.mol1)
        BCUT2D = Chem.rdMolDescriptors.BCUT2D(self.mol1)
        SlogP_VSA1 = Chem.MolSurf.SlogP_VSA1(self.mol1)
        SlogP_VSA2 = Chem.MolSurf.SlogP_VSA2(self.mol1)
        SlogP_VSA3 = Chem.MolSurf.SlogP_VSA3(self.mol1)
        SlogP_VSA4 = Chem.MolSurf.SlogP_VSA4(self.mol1)
        SlogP_VSA5 = Chem.MolSurf.SlogP_VSA5(self.mol1)
        SlogP_VSA6 = Chem.MolSurf.SlogP_VSA6(self.mol1)
        SlogP_VSA7 = Chem.MolSurf.SlogP_VSA7(self.mol1)
        SlogP_VSA8 = Chem.MolSurf.SlogP_VSA8(self.mol1)
        SlogP_VSA9 = Chem.MolSurf.SlogP_VSA9(self.mol1)
        SlogP_VSA10 = Chem.MolSurf.SlogP_VSA10(self.mol1)
        SlogP_VSA11 = Chem.MolSurf.SlogP_VSA11(self.mol1)
        SlogP_VSA12 = Chem.MolSurf.SlogP_VSA12(self.mol1)
        AUTOCORR2D = Chem.rdMolDescriptors.CalcAUTOCORR2D(self.mol1)
        Ipc = Chem.GraphDescriptors.Ipc(self.mol1)
        Ipc0 = Chem.GraphDescriptors.Ipc(self.mol1, avg = 0)
        Ipc1 = Chem.GraphDescriptors.Ipc(self.mol1, avg = 1)
        Ipcn = Chem.GraphDescriptors.Ipc(self.mol1, avg = 0, dMat=None, forceDMat=0)
        FractionCSP3 = Chem.Lipinski.FractionCSP3(self.mol1)
        print("Partial Charge of the Molecule is ", (PartialCharges))
        print("Balaban J of the Molecule is", round(BalabanJ,3))
        print("BertzCT value is", round(BertzCT,3))
        print("MolLogP value of the Molecule is ",MolLogP)
        print("MolWt of the Molecule is ", (MolWt))
        print("ExactMolWt of the Molecule is ", (ExactMolWt))
        print("HeavyAtomCount of the Molecule is ", (HeavyAtomCount))
        print("HeavyAtomMolWt of the Molecule is ", (HeavyAtomMolWt))
        print("NumHeteroatoms of the Molecule is ", (NumHeteroatoms))
        print("Number of Hydrogen bond Acceptors of the Molecule is ", (NumHAcceptors))
        print("Number of Hydrogen Bond Donors of the molecule is ", (NumHDonors))
        print("NumRotatableBonds of the molecule is ", (NumRotatableBonds))
        print("NumSpiroAtoms of the molecule is", (NumSpiroAtoms))
        print("NumAliphaticRings of the molecule is ", (NumAliphaticRings))
        print("NumAromaticRings of the molecule is ", (NumAromaticRings))
        print("NumSaturatedRings of the molecule is ", (NumSaturatedRings))
        print("RingCount of the molecule is ", (RingCount))
        print("NumAliphaticRings of the molecule is ", (NumAliphaticCarbocycles))
        print("NumAliphaticRings of the molecule is ", (NumAliphaticHeterocycles))
        print("The NHOH Count of the molecule is ", (NHOHCount))
        print("The NO Count of the molecule is ", (NOCount))
        print("The TPSA of the molecule is ", (TPSA))
        print("The LabuteASA is ", (LabuteASA))
        print("PEOE_VSA1 is ", (PEOE_VSA1))
        print("PEOE_VSA2 is ", (PEOE_VSA2))
        print("PEOE_VSA3 is ", (PEOE_VSA3))
        print("PEOE_VSA4 is ", (PEOE_VSA4))
        print("PEOE_VSA5 is ", (PEOE_VSA5))
        print("PEOE_VSA6 is ", (PEOE_VSA6))
        print("PEOE_VSA7 is ", (PEOE_VSA7))
        print("PEOE_VSA8 is ", (PEOE_VSA8))
        print("PEOE_VSA9 is ", (PEOE_VSA9))
        print("PEOE_VSA10 is ", (PEOE_VSA10))
        print("SMR_VSA1 is ", (SMR_VSA1))
        print("SMR_VSA2 is ", (SMR_VSA2))
        print("SMR_VSA3 is ", (SMR_VSA3))
        print("SMR_VSA4 is ", (SMR_VSA4))
        print("SMR_VSA5 is ", (SMR_VSA5))
        print("SMR_VSA6 is ", (SMR_VSA6))
        print("SMR_VSA7 is ", (SMR_VSA7))
        print("SMR_VSA8 is ", (SMR_VSA8))
        print("SMR_VSA9 is ", (SMR_VSA9))
        print("SMR_VSA10 is ", (SMR_VSA10))
        print("HallKierAlpha= ", (HallKierAlpha))
        print("Kappa1= ", (Kappa1))
        print("Kappa2= ", (Kappa2))
        print("Kappa3= ", (Kappa3))  
        print("Chi0= ", (Chi0))
        print("Phi0= ", (Phi0))
        print("BCUT2D is", (BCUT2D))
        print("SlogP_VSA1 is ", (SlogP_VSA1))
        print("SlogP_VSA2 is ", (SlogP_VSA2))
        print("SlogP_VSA3 is ", (SlogP_VSA3))
        print("SlogP_VSA4 is ", (SlogP_VSA4))
        print("SlogP_VSA5 is ", (SlogP_VSA5))
        print("SlogP_VSA6 is ", (SlogP_VSA6))
        print("SlogP_VSA7 is ", (SlogP_VSA7))
        print("SlogP_VSA8 is ", (SlogP_VSA8))
        print("SlogP_VSA9 is ", (SlogP_VSA9))
        print("SlogP_VSA10 is ", (SlogP_VSA10))
        print("SlogP_VSA11 is ", (SlogP_VSA11))
        print("SlogP_VSA12 is ", (SlogP_VSA12))
        print("AUTOCORR2D is", (AUTOCORR2D))
        print("Ipc is", (Ipc))
        print("Ipc0 is", (Ipc0))
        print("Ipc1 is", (Ipc1))
        print("Ipcn is", (Ipcn))
        print("FractionCSP3 is ", (FractionCSP3))
        
        
        
        
        
        
        
    def structuresimilarity(self,smiles2):
        self.smiles2 = smiles2
        self.mol2 = Chem.MolFromSmiles(smiles2)
        print("The structure of molecule 2 is ")
        return self.mol2

#Displaying the physicochemical properties for the molecule

In [None]:
SingleMoleculeExplorationRdkit(smiles1="COc1cccc2cc(C(=O)NCCCCN3CCN(c4cccc5nccnc54)CC3)oc21").physiochemical_properties()

In [None]:
SingleMoleculeExplorationRdkit(smiles1="COc1cccc2cc(C(=O)NCCCCN3CCN(c4cccc5nccnc54)CC3)oc21").physiochemical_properties()