In [7]:
from rdkit import Chem
from rdkit.Chem import AllChem, rdDistGeom
from morfeus import read_xyz, XTB

def mol_to_xyz(smiles_list):
    results = [] 
    
    # Iterate over each SMILES string in the list
    for smiles in smiles_list:
        # SMILES to RDKit
        mol = Chem.MolFromSmiles(smiles)
        mol = Chem.AddHs(mol)
        AllChem.EmbedMolecule(mol, rdDistGeom.ETKDG()) # 3D coordinates 
        num_atoms = mol.GetNumAtoms() # RDKit XYZ string
        xyz_lines = [str(num_atoms)]
        xyz_lines.append(smiles)
        for atom in mol.GetAtoms():
            symbol = atom.GetSymbol()
            conf = mol.GetConformer()
            pos = conf.GetAtomPosition(atom.GetIdx())
            xyz_lines.append(f"{symbol} {pos.x:.4f} {pos.y:.4f} {pos.z:.4f}")
        
        # Append the XYZ to results list
        results.append("\n".join(xyz_lines))

    return results

def process_molecules(smiles_list):
    xyz_contents = mol_to_xyz(smiles_list)# SMILES list to XYZ 
    n_nucs = []
    n_eles = []
    for i, xyz_content in enumerate(xyz_contents):
        xyz_file = f"molecule_{i}.xyz"  # XYZ to a file
        with open(xyz_file, 'w') as file:
            file.write(xyz_content)
        
        elements, coordinates = read_xyz(xyz_file) # Morfeus to read XYZ file
        
        xtb = XTB(elements, coordinates) # Create  XTB object 
        
        electrophilicity = xtb.get_global_descriptor("electrophilicity", corrected=False)
        nucleophilicity = xtb.get_global_descriptor("nucleophilicity", corrected=False)
        fukui_electrophilicity = xtb.get_fukui("electrophilicity")
        fukui_nucleophilicity = xtb.get_fukui("nucleophilicity")
        
def get_molecules(smiles_list):
    xyz_contents = mol_to_xyz(smiles_list)# SMILES list to XYZ 
    n_nucs = {}
    n_eles = {}
    for i, xyz_content in enumerate(xyz_contents):
        xyz_file = f"molecule_{i}.xyz"  # XYZ to a file
        with open(xyz_file, 'w') as file:
            file.write(xyz_content)
        
        elements, coordinates = read_xyz(xyz_file) # Morfeus to read XYZ file
        
        xtb = XTB(elements, coordinates) # Create  XTB object 
        
        electrophilicity = xtb.get_global_descriptor("electrophilicity", corrected=False)
        nucleophilicity = xtb.get_global_descriptor("nucleophilicity", corrected=False)
        fukui_electrophilicity = xtb.get_fukui("electrophilicity")
        fukui_nucleophilicity = xtb.get_fukui("nucleophilicity")

        # Fukui electrophilicity
        max_value_el = max(fukui_electrophilicity.values())
        for idx, element in enumerate(elements):
            f_e = fukui_electrophilicity.get(idx+1, 0.0)  # Default to 0.0 if the key is missing
            if f_e == max_value_el:
                n_eles.update({smiles_list[i]: f_e})

                
        # Fukui nucleophilicity
        max_value_nucs = max(fukui_nucleophilicity.values())
        for idx, element in enumerate(elements):
            f_n = fukui_nucleophilicity.get(idx + 1, 0.0)
            if f_n == max_value_nucs:
                n_nucs.update({smiles_list[i] : f_n})

    n_eles = dict(sorted(n_eles.items(), key=lambda item: item[1]))
    n_nucs = dict(sorted(n_nucs.items(), key=lambda item: item[1]))

    if nucl_elec == "E":
        print(n_nucs) #nucleophilicity sorted
    else:
        print(n_eles) #electrophilicity sorted



user_input = input("Enter a list of SMILES strings separated by commas: ")
smiles_list_input = user_input.split(',')
smiles_list_input = [smiles.strip() for smiles in smiles_list_input]

nucl_elec = input("Should we be ranking according to electrophilicity(E) or nucleophilicity(N)?")

process_molecules(smiles_list_input)
get_molecules(smiles_list_input)

Enter a list of SMILES strings separated by commas:  CNO, CNC, CN, Nc1ccccc1
Should we be ranking according to electrophilicity(E) or nucleophilicity(N)? N


{'Nc1ccccc1': 0.10525711225698411, 'CNO': 0.23716284341898616, 'CN': 0.23835523288645338, 'CNC': 0.23926808641228736}


In [None]:
CC=O, CC(=O)C, O=COC, CN(C)C(C)=O

In [None]:
CNO, CNC, CN, Nc1ccccc1