In [2]:
import pandas as pd
import numpy as np
from rdkit import Chem
from rdkit.Chem import Descriptors

In [3]:
data = pd.read_csv('Dataset.csv')

In [4]:
data.head()

Unnamed: 0,compound_name,Smiles
0,Azadiradione,CC(=O)OC1CC2C(C(=O)C=CC2(C3C1(C4=CC(=O)C(C4(CC...
1,dione,CC(C1=CC2=C(O1)C(=O)C3=CC=CC=C3C2=O)O
2,Tanshinone IIA,CC1=COC2=C1C(=O)C(=O)C3=C2C=CC4=C3CCCC4(C)C
3,Gancaonin I,CC(=CCC1=C(C=C2C(=C1OC)C=C(O2)C3=C(C=C(C=C3)O)...
4,Balsaminone A,COC1=C(C2=CC=CC=C2C3=C1C4=C(O3)C(=O)C5=CC=CC=C...


In [9]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Crippen

# Read your DataFrame from CSV (replace 'your_file.csv' with your actual file path)
df = pd.read_csv('Dataset.csv')
"""store column in a variable"""
smiles_column = 'Smiles'
"""Generate RDKit Mol objects from SMILES strings"""
molecules = [Chem.MolFromSmiles(smiles) for smiles in df[smiles_column]]
"""logP and solubility"""
logP_values = [Crippen.MolLogP(mol) if mol is not None else None for mol in molecules]
solubility_values = [Crippen.MolMR(mol) if mol is not None else None for mol in molecules]
"""Add the calculated values to csv file"""
df['LogP'] = logP_values
df['Solubility'] = solubility_values
"""Save or display new csv file"""
df.to_csv('Project_properties.csv', index=False)
df

Unnamed: 0,compound_name,Smiles,LogP,Solubility
0,Azadiradione,CC(=O)OC1CC2C(C(=O)C=CC2(C3C1(C4=CC(=O)C(C4(CC...,5.4179,123.173
1,dione,CC(C1=CC2=C(O1)C(=O)C3=CC=CC=C3C2=O)O,2.1083,62.6488
2,Tanshinone IIA,CC1=COC2=C1C(=O)C(=O)C3=C2C=CC4=C3CCCC4(C)C,4.24792,83.506
3,Gancaonin I,CC(=CCC1=C(C=C2C(=C1OC)C=C(O2)C3=C(C=C(C=C3)O)...,5.0369,101.2186
4,Balsaminone A,COC1=C(C2=CC=CC=C2C3=C1C4=C(O3)C(=O)C5=CC=CC=C...,4.0756,95.2438


In [7]:
from rdkit import Chem
from rdkit.Chem import Crippen, Descriptors, Lipinski

# List of SMILES strings
smiles_list = [
    "CC(=O)OC1CC2C(C(=O)C=CC2(C3C1(C4=CC(=O)C(C4(CC3)C)C5=COC=C5)C)C)(C)C",
    "CC(C1=CC2=C(O1)C(=O)C3=CC=CC=C3C2=O)O",
    "CC1=COC2=C1C(=O)C(=O)C3=C2C=CC4=C3CCCC4(C)C",
    "COC1=C(C2=CC=CC=C2C3=C1C4=C(O3)C(=O)C5=CC=CC=C5C4=O)"
]

# Function to check Lipinski's Rule of Five for a given SMILES string
def check_lipinski_rule(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return False, None  # Invalid SMILES
    logp_value = Crippen.MolLogP(mol)
    return all(
        [
            Descriptors.MolWt(mol) < 500,
            logp_value < 5,
            Lipinski.NumHDonors(mol) < 5,
            Lipinski.NumHAcceptors(mol) < 10,
        ]
    ), logp_value

# Loop through each SMILES string
for smiles in smiles_list:
    passes_rule, logp = check_lipinski_rule(smiles)
    
    # Print the result for each SMILES
    print(f"SMILES: {smiles}")
    print(f"Passes Lipinski Rule of Five: {passes_rule}")
    print(f"LogP Value: {logp}")
    print()

SMILES: CC(=O)OC1CC2C(C(=O)C=CC2(C3C1(C4=CC(=O)C(C4(CC3)C)C5=COC=C5)C)C)(C)C
Passes Lipinski Rule of Five: False
LogP Value: 5.417900000000006

SMILES: CC(C1=CC2=C(O1)C(=O)C3=CC=CC=C3C2=O)O
Passes Lipinski Rule of Five: True
LogP Value: 2.1083000000000003

SMILES: CC1=COC2=C1C(=O)C(=O)C3=C2C=CC4=C3CCCC4(C)C
Passes Lipinski Rule of Five: True
LogP Value: 4.247920000000003

SMILES: COC1=C(C2=CC=CC=C2C3=C1C4=C(O3)C(=O)C5=CC=CC=C5C4=O)
Passes Lipinski Rule of Five: True
LogP Value: 4.370000000000004

