<a href="https://colab.research.google.com/github/drfperez/DeepPurpose/blob/main/DescriptorsfromSMILES.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# RDKit Installation (in Google Colab)
!pip install rdkit

# Importing necessary libraries
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.Chem import AllChem
from rdkit.Chem.rdMolDescriptors import CalcEEMcharges
from google.colab import files

# Definition of the function to calculate descriptors
def calculate_descriptors(compound):
    mol = Chem.MolFromSmiles(compound)

    # Generate conformers
    AllChem.EmbedMultipleConfs(mol, numConfs=10, randomSeed=42)

    descriptors = {}

    # Basic descriptors:
    descriptors['Molecular weight'] = Descriptors.MolWt(mol)
    descriptors['Surface area'] = Descriptors.TPSA(mol)
    descriptors['LogP'] = Descriptors.MolLogP(mol)
    descriptors['Number of atoms'] = mol.GetNumAtoms()
    descriptors['Number of rotatable bonds'] = Descriptors.NumRotatableBonds(mol)

    # Additional descriptors for docking:
    descriptors['Fraction of sp3 carbons'] = Descriptors.FractionCSP3(mol)
    descriptors['Number of hydrogen donors'] = Descriptors.NumHDonors(mol)
    descriptors['Number of hydrogen acceptors'] = Descriptors.NumHAcceptors(mol)

    # Formal topological charge
    descriptors['Formal charge'] = Chem.GetFormalCharge(mol)

    # Electrostatic descriptors:
    charges = CalcEEMcharges(mol)
    descriptors['Total charge'] = sum(charges)
    descriptors['Average charge'] = np.mean(charges)

    # Hydrophobic descriptors:
    #descriptors['McGowan solvation parameter'] = Descriptors.McGowanVolume(mol)
    #descriptors['Hydrophobic accessible surface area'] = Descriptors.GetASAHydrophobic(mol)

    # 3D descriptors:
    #descriptors['Accessible volume'] = Descriptors.GetAccessibleVolume(mol)

    # Fragment-based descriptors:
    #descriptors['Morgan fingerprint'] = Descriptors.GetMorganFingerprintAsBitVect(mol, radius=2)
    #descriptors['MACCS descriptor'] = Descriptors.GetMACCSKeysFingerprint(mol)

    # Connectivity descriptors:
    #descriptors['Branching index'] = Descriptors.GetBranchingDegree(mol)
    descriptors['Number of rings'] = mol.GetRingInfo().NumRings()

    # Topological descriptors:
    #descriptors['Wiener index'] = Descriptors.WienerIndex(mol)
    descriptors['Number of cycles'] = Descriptors.RingCount(mol)

    # Quantum descriptors (using estimated HOMO-LUMO gap):
    descriptors['HOMO-LUMO gap (estimated)'] = Descriptors.FractionCSP3(mol)

    # Docking descriptors (omitted for simplicity)
    # descriptors['Docking score'] = ...
    # descriptors['Binding energy'] = ...

    return descriptors

# Introduction of the chemical compounds as SMILES separated by commas
chemical_compounds = input("Enter the SMILES structures of the chemical compounds separated by commas: ")
chemical_compounds_list = chemical_compounds.split(',')

# Calculation of descriptors for each compound
all_compound_descriptors = []
for compound in chemical_compounds_list:
    compound_descriptors = calculate_descriptors(compound)
    all_compound_descriptors.append(compound_descriptors)

# Convert descriptors to DataFrame
df = pd.DataFrame(all_compound_descriptors)

# Save DataFrame to CSV
output_file_path = 'compound_descriptors.csv'
df.to_csv(output_file_path, index=False)

# Download CSV file
files.download(output_file_path)

# Show descriptors
print("\nDescriptors of the chemical compounds:")
print(df)

Enter the SMILES structures of the chemical compounds separated by commas: CC1C(=O)OC2C1(C34C(=O)OC5C3(C2O)C6(C(C5)C(C)(C)C)C(C(=O)OC6O4)O)O,CC1C(=O)OC2C1(C34C(=O)OC5C3(C2O)C6(C(C5)C(C)(C)C)C(C(=O)OC6O4)O)O


[17:54:14] Molecule does not have explicit Hs. Consider calling AddHs()
[17:54:14] Molecule does not have explicit Hs. Consider calling AddHs()


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Descriptors of the chemical compounds:
   Molecular weight  Surface area    LogP  Number of atoms  \
0           424.402        148.82 -1.3695               30   
1           424.402        148.82 -1.3695               30   

   Number of rotatable bonds  Fraction of sp3 carbons  \
0                          0                     0.85   
1                          0                     0.85   

   Number of hydrogen donors  Number of hydrogen acceptors  Formal charge  \
0                          3                            10              0   
1                          3                            10              0   

   Total charge  Average charge  Number of rings  Number of cycles  \
0 -1.443290e-15   -4.810966e-17                6                 6   
1 -1.443290e-15   -4.810966e-17                6                 6   

   HOMO-LUMO gap (estimated)  
0                       0.85  
1                       0.85  
