In [11]:
import pandas as pd
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Descriptors

### TODO: 1. Functions related to molecular complexity MC1 and MC2

In [1]:
# TODO: HAC calculation
def hac(smiles):
    mol = Chem.MolFromSmiles(smiles)
    size = mol.GetNumHeavyAtoms()

    return size

In [4]:
# TODO: Calculate the number of divalent nodes
def count_divalent_nodes(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return 0  # Return 0 if the SMILES string is invalid
    return sum(1 for atom in mol.GetAtoms() if atom.GetDegree() == 2)

In [2]:
# TODO: Calculate the FDV
def divalent_nodes_fraction(smiles):
    mol=Chem.MolFromSmiles(smiles)
    atom_number = 0
    divalent_node = 0
    
    for atom in mol.GetAtoms():
        atom_number += 1
        degree = atom.GetDegree()

        if degree == 2:
            divalent_node += 1 
        else:
            continue

    divalent_ratio = round(divalent_node/atom_number,2)
    return divalent_ratio

In [3]:
# TODO: Count the number of C=O-X structures
def count_C_O_X(smiles):
    # Convert the SMILES to an RDKit molecule object
    mol = Chem.MolFromSmiles(smiles)
    
    if mol is None:
        raise ValueError("Invalid SMILES string.")

    # Define the SMARTS pattern for the carbonyl group (C=O)
    pattern_C_O = Chem.MolFromSmarts('[C,c]=O')

    # Find all carbonyl groups in the molecule
    matches_C_O = mol.GetSubstructMatches(pattern_C_O)

    # Initialize the count of C=O with either N or O (excluding the bonded oxygen) as a neighbor
    count_with_N_or_O = 0

    for match in matches_C_O:
        carbon_idx, oxygen_idx = match  # Get the carbon and oxygen atom indices in the C=O group

        # Get neighboring atoms of the carbon atom
        neighbors = mol.GetAtomWithIdx(carbon_idx).GetNeighbors()

        # Exclude the oxygen atom from the C=O group when checking neighbors
        valid_neighbors = [neighbor for neighbor in neighbors if neighbor.GetIdx() != oxygen_idx]

        # Check if any remaining neighbor is either nitrogen (atomic number 7) or oxygen (atomic number 8)
        if any(neighbor.GetAtomicNum() in [7, 8] for neighbor in valid_neighbors):
            count_with_N_or_O += 1

    return count_with_N_or_O

### TODO: 2. Functions of MC1 and MC2

In [5]:
# TODO: MC1 calculation
def mc1(smiles):

    mc1 = 1- divalent_nodes_fraction(smiles)
    
    return mc1

In [13]:
# TODO: MC1 calculation
def mc2(smiles):
    
    mc2 = hac(smiles) - count_divalent_nodes(smiles) - 2*(count_C_O_X(smiles))

    return mc2

### TODO: 3. Calculation of MC1 and MC2

In [31]:
mc1("COC1=C(O)C=C(CC(=O)O)C=C1Br")

0.71

In [32]:
mc2("COC1=C(O)C=C(CC(=O)O)C=C1Br")

8