In [31]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors

# Function to check if a compound passes Lipinski Rule of Five
def passes_lipinski_rule(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is not None:
        return (Descriptors.MolWt(mol) <= 500) and (Descriptors.MolLogP(mol) <= 5) and (Descriptors.NumHDonors(mol) <= 5) and (Descriptors.NumHAcceptors(mol) <= 10)
    else:
        return False

# Load your dataset (assuming you have a DataFrame with 'canonical_smiles' column)
df = pd.read_csv('_Dataset_for_QSAR.csv')
df = df.dropna()

# Apply Lipinski Rule of Five to the dataset
df['passes_lipinski'] = df['canonical_smiles'].apply(passes_lipinski_rule)

# Save compounds that pass Lipinski Rule of Five to a new CSV file
lipinski_pass_df = df[df['passes_lipinski']]
lipinski_pass_df.to_csv('lipinski_passed.csv', index=False)

# Display the result
print("Compounds that pass Lipinski Rule of Five:")
print(lipinski_pass_df[['canonical_smiles']])



Compounds that pass Lipinski Rule of Five:
                                         canonical_smiles
8        CC(/C=C/C(=O)NO)=C\[C@@H](C)C(=O)c1ccc(N(C)C)cc1
11      CC(=O)[C@]1(N)Cc2c(O)c3c(c(O)c2[C@@H](O[C@H]2C...
13      CC(=O)[C@]1(O)Cc2c(O)c3c(c(O)c2[C@@H](O[C@H]2C...
15                           CC(C)Cn1cnc2c(N)nc3ccccc3c21
16      CC1(C)S[C@@H]2[C@H](NC(=O)Cc3ccccc3)C(=O)N2[C@...
...                                                   ...
132152  Cc1ccc2c(N3CCN(C(=O)N(c4nccc5ccccc45)[C@@H]4CC...
132153   Cc1nccnc1N(C(=O)N1CCC(c2ccccc2)CC1)[C@@H]1CCCNC1
132154  Cn1ccc2ccnc(N(C(=O)N3CCC(c4ccccc4)CC3)[C@@H]3C...
132155  Cc1cccnc1N(C(=O)N1CCC(c2ccccc2F)CC1)[C@@H]1CCCNC1
132156  Cc1cccnc1N(C(=O)N1CCC(c2cnn3cccnc23)CC1)[C@@H]...

[85296 rows x 1 columns]
