# Molecular Descriptors

In [25]:
from rdkit import Chem
from rdkit.Chem import Descriptors, Draw
from rdkit.Chem.AllChem import GetMorganFingerprintAsBitVect
import pandas as pd

## Molecures under study
We will use SMILES in this example

In [26]:
# Define the SMILES of example molecules
smiles_list = [
    "CCO",          # Ethanol
    "CC(=O)OC1=CC=CC=C1C(=O)O",  # Aspirin
    "C1=CC=CC=C1",  # Benzene
    "CCN(CC)CC",    # Triethylamine
]



In [27]:
# Defining the descriptors
def calculate_descriptors(mol):
    return {
        "MolWt": Descriptors.MolWt(mol),
        "LogP": Descriptors.MolLogP(mol),
        "NumHDonors": Descriptors.NumHDonors(mol),
        "NumHAcceptors": Descriptors.NumHAcceptors(mol),
        "TPSA": Descriptors.TPSA(mol),
    }

# Molecular Descriptors calculation
data = []
fingerprints = []
for smi in smiles_list:
    mol = Chem.MolFromSmiles(smi)
    if mol:
        desc = calculate_descriptors(mol)
        desc["SMILES"] = smi
        data.append(desc)
        
# Convert the output to a DataFrame
df = pd.DataFrame(data)
print("Molecular descriptors:")
print(df)

Molecular descriptors:
     MolWt    LogP  NumHDonors  NumHAcceptors   TPSA                    SMILES
0   46.069 -0.0014           1              1  20.23                       CCO
1  180.159  1.3101           1              3  63.60  CC(=O)OC1=CC=CC=C1C(=O)O
2   78.114  1.6866           0              0   0.00               C1=CC=CC=C1
3  101.193  1.3481           0              1   3.24                 CCN(CC)CC


In [28]:
from rdkit.Chem import MACCSkeys
ethanol = Chem.MolFromSmiles(smiles_list[0])
maccs_fp = MACCSkeys.GenMACCSKeys(ethanol)
print(maccs_fp.ToBitString())

00000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000001000010000000000000000000000001000000000000010101001000100
