In [6]:
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import AllChem
import numpy as np
import json

In [2]:
def analyze_molecule(smiles):
    """
    Analyze a molecule's structural features using SMILES string.
    Returns information about rings, functional groups, and basic properties.
    """
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return "Invalid SMILES string"
    
    analysis = {}
    
    # Ring analysis
    analysis['ring_info'] = {
        'total_rings': rdMolDescriptors.CalcNumRings(mol),
        'aromatic_rings': rdMolDescriptors.CalcNumAromaticRings(mol),
        'aliphatic_rings': rdMolDescriptors.CalcNumAliphaticRings(mol),
        'ring_sizes': [len(ring) for ring in mol.GetRingInfo().AtomRings()]
    }
    
    # Functional group analysis
    analysis['functional_groups'] = {
        'alcohols': len(mol.GetSubstructMatches(Chem.MolFromSmarts('[OH]'))),
        'carboxylic_acids': len(mol.GetSubstructMatches(Chem.MolFromSmarts('[CX3](=O)[OH]'))),
        'amines': len(mol.GetSubstructMatches(Chem.MolFromSmarts('[NX3;H2,H1;!$(NC=O)]'))),
        'amides': len(mol.GetSubstructMatches(Chem.MolFromSmarts('[NX3;H2,H1;$(NC=O)]'))),
        'aldehydes': len(mol.GetSubstructMatches(Chem.MolFromSmarts('[CH2]=O'))),
        'ketones': len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#6][CX3](=O)[#6]'))),
        'esters': len(mol.GetSubstructMatches(Chem.MolFromSmarts('[#6]C(=O)O[#6]'))),
        'ethers': len(mol.GetSubstructMatches(Chem.MolFromSmarts('[OD2]([#6])[#6]')))
    }
    
    # Basic properties
    analysis['properties'] = {
        'molecular_weight': rdMolDescriptors.CalcExactMolWt(mol),
        'rotatable_bonds': rdMolDescriptors.CalcNumRotatableBonds(mol),
        'h_bond_donors': rdMolDescriptors.CalcNumHBD(mol),
        'h_bond_acceptors': rdMolDescriptors.CalcNumHBA(mol),
        'sp3_carbons': len(mol.GetSubstructMatches(Chem.MolFromSmarts('[CX4]')))
    }
    
    return analysis

# Example usage
test_molecules = {
    'aspirin': 'CC(=O)OC1=CC=CC=C1C(=O)O',
    'benzene': 'c1ccccc1',
    'glucose': 'C([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O'
}

for name, smiles in test_molecules.items():
    print(f"\nAnalyzing {name}:")
    result = analyze_molecule(smiles)
    print(result)


Analyzing aspirin:
{'ring_info': {'total_rings': 1, 'aromatic_rings': 1, 'aliphatic_rings': 0, 'ring_sizes': [6]}, 'functional_groups': {'alcohols': 1, 'carboxylic_acids': 1, 'amines': 0, 'amides': 0, 'aldehydes': 0, 'ketones': 0, 'esters': 1, 'ethers': 1}, 'properties': {'molecular_weight': 180.042258736, 'rotatable_bonds': 2, 'h_bond_donors': 1, 'h_bond_acceptors': 3, 'sp3_carbons': 1}}

Analyzing benzene:
{'ring_info': {'total_rings': 1, 'aromatic_rings': 1, 'aliphatic_rings': 0, 'ring_sizes': [6]}, 'functional_groups': {'alcohols': 0, 'carboxylic_acids': 0, 'amines': 0, 'amides': 0, 'aldehydes': 0, 'ketones': 0, 'esters': 0, 'ethers': 0}, 'properties': {'molecular_weight': 78.046950192, 'rotatable_bonds': 0, 'h_bond_donors': 0, 'h_bond_acceptors': 0, 'sp3_carbons': 0}}

Analyzing glucose:
{'ring_info': {'total_rings': 1, 'aromatic_rings': 0, 'aliphatic_rings': 1, 'ring_sizes': [6]}, 'functional_groups': {'alcohols': 5, 'carboxylic_acids': 0, 'amines': 0, 'amides': 0, 'aldehydes': 

In [3]:
def analyze_specific_rings(smiles):
    """
    Identify specific types of ring systems
    """
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return "Invalid SMILES string"
    
    ring_types = {
        'benzene': len(mol.GetSubstructMatches(Chem.MolFromSmarts('c1ccccc1'))),
        'pyridine': len(mol.GetSubstructMatches(Chem.MolFromSmarts('n1ccccc1'))),
        'pyrrole': len(mol.GetSubstructMatches(Chem.MolFromSmarts('[nH]1cccc1'))),
        'furan': len(mol.GetSubstructMatches(Chem.MolFromSmarts('o1cccc1'))),
        'thiophene': len(mol.GetSubstructMatches(Chem.MolFromSmarts('s1cccc1'))),
        'cyclohexane': len(mol.GetSubstructMatches(Chem.MolFromSmarts('C1CCCCC1'))),
        'cyclopentane': len(mol.GetSubstructMatches(Chem.MolFromSmarts('C1CCCC1')))
    }
    
    return ring_types

def analyze_substituents(smiles):
    """
    Analyze common substituents
    """
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return "Invalid SMILES string"
    
    substituents = {
        'methyl': len(mol.GetSubstructMatches(Chem.MolFromSmarts('C[#6]'))),
        'ethyl': len(mol.GetSubstructMatches(Chem.MolFromSmarts('CC[#6]'))),
        'phenyl': len(mol.GetSubstructMatches(Chem.MolFromSmarts('c1ccccc1[#6]'))),
        'chloro': len(mol.GetSubstructMatches(Chem.MolFromSmarts('Cl'))),
        'fluoro': len(mol.GetSubstructMatches(Chem.MolFromSmarts('F'))),
        'bromo': len(mol.GetSubstructMatches(Chem.MolFromSmarts('Br'))),
        'nitro': len(mol.GetSubstructMatches(Chem.MolFromSmarts('[N+](=O)[O-]'))),
        'sulfo': len(mol.GetSubstructMatches(Chem.MolFromSmarts('S(=O)(=O)[OH]')))
    }
    
    return substituents

In [4]:
def analyze_stereochemistry(smiles):
    """
    Analyze stereochemical features
    """
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return "Invalid SMILES string"
    
    stereo_info = {
        'chiral_centers': len(Chem.FindMolChiralCenters(mol)),
        'e_z_double_bonds': len([b for b in mol.GetBonds() if b.GetStereo() != Chem.BondStereo.STEREONONE]),
        'cis_trans_cycles': len([b for b in mol.GetBonds() if b.IsInRing() and b.GetStereo() != Chem.BondStereo.STEREONONE])
    }
    
    return stereo_info

In [13]:
def complete_molecule_analysis(smiles):
    """
    Perform a complete analysis of a molecule
    """
    return {
        'basic_analysis': analyze_molecule(smiles),
        'ring_types': analyze_specific_rings(smiles),
        'substituents': analyze_substituents(smiles),
        'stereochemistry': analyze_stereochemistry(smiles)
    }

# Example usage
molecule = 'CC1=C(C(=O)CC(C1=O)C)OC'  # Example molecule
test_molecules = {
    'aspirin': 'CC(=O)OC1=CC=CC=C1C(=O)O',
    'benzene': 'c1ccccc1',
    'glucose': 'C([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O'
}

analysis = complete_molecule_analysis(test_molecules['aspirin'])
print(json.dumps(analysis, indent=2))
# for category, results in analysis.items():
#     print(f"\n{category.upper()}:")
#     print(json.dumps(results, indent=2))

{
  "basic_analysis": {
    "ring_info": {
      "total_rings": 1,
      "aromatic_rings": 1,
      "aliphatic_rings": 0,
      "ring_sizes": [
        6
      ]
    },
    "functional_groups": {
      "alcohols": 1,
      "carboxylic_acids": 1,
      "amines": 0,
      "amides": 0,
      "aldehydes": 0,
      "ketones": 0,
      "esters": 1,
      "ethers": 1
    },
    "properties": {
      "molecular_weight": 180.042258736,
      "rotatable_bonds": 2,
      "h_bond_donors": 1,
      "h_bond_acceptors": 3,
      "sp3_carbons": 1
    }
  },
  "ring_types": {
    "benzene": 1,
    "pyridine": 0,
    "pyrrole": 0,
    "furan": 0,
    "thiophene": 0,
    "cyclohexane": 0,
    "cyclopentane": 0
  },
  "substituents": {
    "methyl": 2,
    "ethyl": 0,
    "phenyl": 1,
    "chloro": 0,
    "fluoro": 0,
    "bromo": 0,
    "nitro": 0,
    "sulfo": 0
  },
  "stereochemistry": {
    "chiral_centers": 0,
    "e_z_double_bonds": 0,
    "cis_trans_cycles": 0
  }
}
