In [48]:
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem
from openbabel import openbabel
import pubchempy as pcp
import numpy as np
import py3Dmol
import os
import re
from io import StringIO
from Bio.PDB import PDBParser, PDBList
import ipywidgets as widgets
from ipywidgets import interact, fixed, IntSlider, Text, Dropdown, ToggleButton, Button, FloatSlider, Checkbox, SelectMultiple
from IPython.display import display
obMol = openbabel.OBMol()
obConv = openbabel.OBConversion()
"""IMPORTANT: DO NOT USE ANY OTHER VARIABLES NAMED obMol OR obConv!!!"""

from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import rdCIPLabeler
from rdkit.Chem import rdAbbreviations
IPythonConsole.drawOptions.addAtomIndices = False
IPythonConsole.ipython_useSVG=True
IPythonConsole.molSize = 300,300



import pymsym

In [49]:
class DataCache:
    def __init__(self):
        self.cache = {}

    def add_data(self, key, value):
        if key not in self.cache:
            self.cache[key] = [value]
        else:
            if value not in self.cache[key]:
                self.cache[key].append(value)

    def get_data(self, key):
        return self.cache.get(key)

    def print_data(self, target):
        if target == 'smiles':
            print(f"Data type: SMILES; Data: {self.cache['SMILES']}")
        elif target == 'cid':
            print(f"Data type: SMILES; Data: {self.cache['CID']}")
        elif target == 'inchi':
            print(f"Data type: SMILES; Data: {self.cache['InChi']}")
        elif target == 'inchikey':
            print(f"Data type: SMILES; Data: {self.cache['InChiKey']}")
        elif target == 'name':
            print(f"Data type: SMILES; Data: {self.cache['Name']}")
        elif target == 'all':
            for key, value in self.cache.items():
                print(f"Data type: {key}; Data: {value}")

In [50]:
class converter:
    def __init__(self, data: str, data_type: str, data_cache: DataCache):
        """Initializes the converter with the input data and its type"""
        self.data = data
        self.data_type = data_type.lower()
        self.data_cache = data_cache

    def convert(self, target_format: str):
        """Converts the input data to the target format"""
        target_format = target_format.lower()
        if self.data_type == 'name':
            self.data_cache.add_data('Name', self.data)
            smiles = self.name_to_smiles()
            sdf = self.name_to_sdf()
            if target_format == 'smiles':
                return smiles
            elif target_format == 'cid':
                return self.smiles_to_cid(smiles)
            elif target_format == 'sdf':
                return sdf
            elif target_format == 'inchi':
                return self.smiles_to_inchi(smiles)
            elif target_format == 'inchikey':
                return self.smiles_to_inchikey(smiles)
            elif target_format == 'xyz':
                return self.sdf_to_xyz(sdf, smiles)
            elif target_format == 'zmat':
                return self.sdf_to_zmat(sdf, smiles)
            elif target_format == 'name':
                return self.data
        elif self.data_type == 'smiles':
            self.data_cache.add_data('SMILES', self.data)
            smiles = self.data
            sdf = self.smiles_to_sdf()
            if target_format == 'smiles':
                return self.data
            elif target_format == 'cid':
                return self.smiles_to_cid(smiles)
            elif target_format == 'sdf':
                return sdf
            elif target_format == 'inchi':
                return self.smiles_to_inchi(smiles)
            elif target_format == 'inchikey':
                return self.smiles_to_inchikey(smiles)
            elif target_format == 'xyz':
                return self.sdf_to_xyz(sdf, smiles)
            elif target_format == 'zmat':
                return self.sdf_to_zmat(sdf, smiles)
            elif target_format == 'name':
                return self.smiles_to_name1()
        elif self.data_type == 'inchi':
            self.data_cache.add_data('InChi', self.data)
            smiles = self.inchi_to_smiles()
            sdf = self.inchi_to_sdf()
            if target_format == 'smiles':
                return smiles
            elif target_format == 'cid':
                return self.smiles_to_cid(smiles)
            elif target_format == 'sdf':
                return sdf
            elif target_format == 'inchi':
                return self.data
            elif target_format == 'inchikey':
                return self.smiles_to_inchikey(smiles)
            elif target_format == 'xyz':
                return self.sdf_to_xyz(sdf, smiles)
            elif target_format == 'zmat':
                return self.sdf_to_zmat(sdf, smiles)
            elif target_format == 'name':
                return self.smiles_to_name2(smiles)
        elif self.data_type == 'inchikey':
            self.data_cache.add_data('InChiKey', self.data)
            smiles = self.inchikey_to_smiles()
            sdf = self.inchikey_to_sdf()
            if target_format == 'smiles':
                return smiles
            elif target_format == 'cid':
                return self.smiles_to_cid(smiles)
            elif target_format == 'sdf':
                return sdf
            elif target_format == 'inchi':
                return self.smiles_to_inchi(smiles)
            elif target_format == 'inchikey':
                return self.data
            elif target_format == 'xyz':
                return self.sdf_to_xyz(sdf, smiles)
            elif target_format == 'zmat':
                return self.sdf_to_zmat(sdf, smiles)
            elif target_format == 'name':
                return self.smiles_to_name2(smiles)
        elif self.data_type == 'cid':
            self.data_cache.add_data('CID', self.data)
            smiles = self.cid_to_smiles()
            sdf = self.cid_to_sdf()
            if target_format == 'smiles':
                return smiles
            elif target_format == 'cid':
                return self.data
            elif target_format == 'sdf':
                return sdf
            elif target_format == 'inchi':
                return self.smiles_to_inchi(smiles)
            elif target_format == 'inchikey':
                return self.smiles_to_inchikey(smiles)
            elif target_format == 'xyz':
                return self.sdf_to_xyz(sdf, smiles)
            elif target_format == 'zmat':
                return self.sdf_to_zmat(sdf, smiles)
            elif target_format == 'name':
                return self.smiles_to_name2(smiles)

    def name_to_smiles(self):
        """Converts molecule name to SMILES"""
        try:
            c = pcp.get_compounds(self.data, 'name')
            smiles = c[0].isomeric_smiles
            self.data_cache.add_data('SMILES', smiles)
            return smiles
        except IndexError:
            return None

    def cid_to_smiles(self):
        """Converts CID to SMILES"""
        try:
            cid = pcp.get_compounds(self.data, 'cid')
            smiles = cid[0].isomeric_smiles
            self.data_cache.add_data('SMILES', smiles)
            return smiles
        except IndexError:
            return None

    def smiles_to_name1(self):
        """Converts SMILES to molecule name"""
        try:
            smi = pcp.get_compounds(self.data, 'smiles')
            name = smi[0].iupac_name
            self.data_cache.add_data('Name', name)
            return name
        except IndexError:
            return None

    def smiles_to_name2(self, smiles):
        """Converts SMILES to molecule name"""
        try:
            smi = pcp.get_compounds(smiles, 'smiles')
            name = smi[0].iupac_name
            self.data_cache.add_data('Name', name)
            return name
        except IndexError:
            return None

    def inchi_to_smiles(self):
        """Converts InChi to SMILES"""
        try:
            ic = pcp.get_compounds(self.data, 'inchi')
            smiles = ic[0].isomeric_smiles
            self.data_cache.add_data('SMILES', smiles)
            return smiles
        except IndexError:
            return None

    def inchikey_to_smiles(self):
        """Converts InChiKey to SMILES"""
        try:
            ick = pcp.get_compounds(self.data, 'inchikey')
            smiles = ick[0].isomeric_smiles
            self.data_cache.add_data('SMILES', smiles)
            return smiles
        except IndexError:
            return None

    def smiles_to_cid(self, smiles):
        """Converts SMILES to CID"""
        try:
            c = pcp.get_cids(smiles, 'smiles', list_return='flat')
            cid = c[0]
            self.data_cache.add_data('CID', cid)
            return cid
        except IndexError:
            return None
        
    def smiles_to_sdf(self):
        """Converts SMILES to SDF"""
        try:
            file_path = f'./3Dfiles/{self.data}.sdf'
            try:
                pcp.download('SDF', file_path, self.data, 'smiles', overwrite=True)
                with open(file_path, 'r') as f:
                    return f.read()
            except Exception as e:
                print("Error during download:", e)
                return None
        except IndexError:
            return None

    def name_to_sdf(self):
        """Converts name to SDF"""
        try:
            file_path = f'./3Dfiles/{self.data}.sdf'
            try:
                pcp.download('SDF', file_path, self.data, 'name', overwrite=True)
                with open(file_path, 'r') as f:
                    return f.read()
            except Exception as e:
                print("Error during download:", e)
                return None
        except IndexError:
            return None

    def inchi_to_sdf(self):
        """Converts inchi to SDF"""
        try:
            file_path = f'./3Dfiles/{self.data}.sdf'
            try:
                pcp.download('SDF', file_path, self.data, 'inchi', overwrite=True)
                with open(file_path, 'r') as f:
                    return f.read()
            except Exception as e:
                print("Error during download:", e)
                return None
        except IndexError:
            return None

    def inchikey_to_sdf(self):
        """Converts inchikey to SDF"""
        try:
            file_path = f'./3Dfiles/{self.data}.sdf'
            try:
                pcp.download('SDF', file_path, self.data, 'inchikey', overwrite=True)
                with open(file_path, 'r') as f:
                    return f.read()
            except Exception as e:
                print("Error during download:", e)
                return None
        except IndexError:
            return None

    def cid_to_sdf(self):
        """Converts name to SDF"""
        try:
            file_path = f'./3Dfiles/{self.data}.sdf'
            try:
                pcp.download('SDF', file_path, self.data, 'cid', overwrite=True)
                with open(file_path, 'r') as f:
                    return f.read()
            except Exception as e:
                print("Error during download:", e)
                return None
        except IndexError:
            return None

    def smiles_to_inchi(self, smiles):
        """Converts SMILES to InChI"""
        try:
            obMol = openbabel.OBMol()
            obConv = openbabel.OBConversion()
            obConv.SetInAndOutFormats("smiles", "inchi")
            obConv.ReadString(obMol, smiles)
            ic = obConv.WriteString(obMol)
            self.data_cache.add_data('InChi', ic)
            return ic
        except IndexError:
            return None

    def smiles_to_inchikey(self, smiles):
        """Converts SMILES to InChiKey"""
        try:
            obMol = openbabel.OBMol()
            obConv = openbabel.OBConversion()
            obConv.SetInAndOutFormats("smiles", "inchikey")
            obConv.ReadString(obMol, smiles)
            ick = obConv.WriteString(obMol)
            self.data_cache.add_data('InChiKey', ick)
            return ick
        except IndexError:
            return None

    def sdf_to_xyz(self, sdf, smiles):
        """Converts SDF to XYZ"""
        name = self.smiles_to_name2(smiles)
        directory = './3Dfiles/'
        try:
            obMol = openbabel.OBMol()
            obConv = openbabel.OBConversion()
            obConv.SetInAndOutFormats("sdf", "xyz")
            obConv.ReadString(obMol, sdf)
            xyz = obConv.WriteString(obMol)
            if not os.path.exists(directory):
                os.makedirs(directory)
            file_path = os.path.join('./3Dfiles/', f"{name}.xyz")
            with open(file_path, "w") as file:
                file.write(xyz)
            return xyz
        except IndexError:
            return None

    def sdf_to_zmat(self, sdf, smiles):
        """Converts SDF to zmat"""
        name = self.smiles_to_name2(smiles)
        directory = './3Dfiles/'
        try:
            obMol = openbabel.OBMol()
            obConv = openbabel.OBConversion()
            obConv.SetInAndOutFormats("sdf", "gzmat")
            obConv.ReadString(obMol, sdf)
            zmat = obConv.WriteString(obMol)
            if not os.path.exists(directory):
                os.makedirs(directory)
            file_path = os.path.join('./3Dfiles/', f"{name}.txt")
            with open(file_path, "w") as file:
                file.write(zmat)
            return zmat
        except IndexError:
            return None

In [51]:
cache = DataCache()
sdf_cache = {}
protein_cache = {}

The `get_sdf()` function gets two arguments : `identifier` and `identifier_type`. <br>
`Identifier_type` is a selection of different chemical identifiers such as the name, SMILES, InChI, InCh, InChIKey or CID. <br>
`Identifier` is the name/notation of the molecule in the any identifier type. <br>

For example let's take benzene. <br>
We could write it in different manners: <br>

>`identifier = "C1=CC=CC=C1"` <br>
>`identifier_type = "SMILES"`

or

>`identifier = "241"` <br>
>`identifier_type = "CID"`

In [52]:
def get_sdf(identifier, identifier_type):
    directory = './3Dfiles/'
    if not os.path.exists(directory):
        os.makedirs(directory)

    # Define the file path for the SDF file
    sdf_file = os.path.join(directory, f"{identifier}.sdf")

    # Check if the SDF file already exists
    if not os.path.exists(sdf_file):
        # If not, convert SMILES to SDF and save it
        molecule = converter(identifier, identifier_type, cache)
        sdf = molecule.convert('sdf')
        if not sdf:
            print("Failed to convert molecule to SDF.")
            return None
        
        with open(sdf_file, 'w') as f:
            f.write(sdf)
    else:
        # If the SDF file exists, read it
        with open(sdf_file, 'r') as f:
            sdf = f.read()

    return sdf

In [53]:
style_dropdown = Dropdown(
    options=['line', 'stick', 'sphere','All'],
    value='All',
    description='Style:')

linewidth_slider = FloatSlider(
    value=2,
    min=1,
    max=10,
    step=1,
    continuous_update=False,
    description='Line Width')

radius_slider = FloatSlider(
    value=0.2,
    min=0,
    max=1,
    step=0.1,
    continuous_update=False,
    description='Atomic radius size') 

scale_slider = FloatSlider(
    value=1,
    min=0,
    max=1,
    step=0.1,
    continuous_update=False,
    description='Sphere size')

In [54]:
def update_visibility(style):
    if style == 'line':
        linewidth_slider.layout.visibility = 'visible'
        radius_slider.layout.visibility = 'hidden'
        scale_slider.layout.visibility = 'hidden'
    elif style =='stick':
        linewidth_slider.layout.visibility = 'hidden'
        radius_slider.layout.visibility = 'visible'
        scale_slider.layout.visibility = 'hidden'
    elif style =='sphere':
        linewidth_slider.layout.visibility = 'hidden'
        radius_slider.layout.visibility = 'hidden'
        scale_slider.layout.visibility = 'visible'
    elif style == 'All':
        linewidth_slider.layout.visibility = 'visible'
        radius_slider.layout.visibility = 'visible'
        scale_slider.layout.visibility = 'visible'

In [55]:
def add_model(view, sdf, style, linewidth, radius, scale):
    """Add the SDF model to the 3Dmol view and apply the given style."""
    view.addModel(sdf, 'sdf')
    view.setBackgroundColor('#000000')
    if style == 'line':
        view.setStyle({'line': {'linewidth': linewidth}})
    elif style == 'stick':
        view.setStyle({'stick': {'radius': radius}})
    elif style == 'sphere':
        view.setStyle({'sphere': {'scale': scale}})
    elif style == 'All':
        view.setStyle({'line': {'linewidth': linewidth}}, viewer=(0,0))
        view.setStyle({'stick': {'radius': radius}}, viewer=(0,1))
        view.setStyle({'sphere': {'scale': scale}}, viewer=(0,2))

The `view3D()` is a 3D visualization tool that renders molecules and allows the user to interact with them, such as zooming or changing the viewing style. 

It takes as input : `data_cache` which is the caching function, `identifier` and `identifier_type` which are the same as described earlier. The remaining parameters are for the interface. `radius` is to change the radius of the sphere for the 'stick' and 'sphere' representations. `linewidth` *should* be changing the width of the 'line' representations, however this functionality doesn't seem to work and we did not find a way around it.
<br>
<br>
To test the function, write what the name or idientifier of the molecule you want next to `identifier` (here we took ethanol as an example). An interface will open below where you can directly change the name of the molecule without changing the code. You can also freely change how the molecule renders using three different visualization type : 'line', 'stick' and 'sphere'. You can also rotate the molecule if you click and hold on the molecule and move your mouse. You can also zoom by using the scrolling wheel or using two fingers on your trackpad.

In [56]:
def view3D(data_cache, identifier, identifier_type, style='All', linewidth='1', radius='0.2', scale='1'):
    """Visualize a molecule in 3D"""

    if identifier not in sdf_cache:  # Check if the SDF file is already cached
        sdf = get_sdf(identifier, identifier_type)  # Fetch the SDF file for the new molecule identifier
        if not sdf:
            print("Failed to fetch SDF file.")
            return None
        sdf_cache[identifier] = sdf # Cache the loaded SDF file
    else:
        sdf = sdf_cache[identifier]
    
    view = py3Dmol.view(width=1000, height=1000)    # 3D visualization
    add_model(view, sdf, style, linewidth, radius, scale) # Parameters
    view.zoomTo()   # Parameter so the user can zoom/dezoom on the molecule
    
    if style == 'All':
        view = py3Dmol.view(width=1500, height=800, viewergrid=(1,3), linked=True)
        add_model(view, sdf, 'All', linewidth, radius, scale)
    return view

interact(view3D, data_cache=fixed(cache), #Example with ethanol
         identifier='ethanol',
         identifier_type=['Name', 'SMILES', 'InChi', 'InChiKey', 'CID'],
         style=style_dropdown,
         linewidth=linewidth_slider,
         radius=radius_slider,
         scale=scale_slider)

style_dropdown.observe(lambda change: update_visibility(change['new']), names='value')

interactive(children=(Text(value='ethanol', description='identifier'), Dropdown(description='identifier_type',…

Explain what `get_pdb()` is.

In [57]:
def get_pdb(identifier):
    directory = './PDBfiles/'
    if not os.path.exists(directory):
        os.makedirs(directory)

    # Define the file path for the PDB file
    pdb_file = os.path.join(directory, f"{identifier}.pdb")

    # Check if the PDB file already exists
    if not os.path.exists(pdb_file):
        # Fetch the PDB file from the RCSB PDB database
        pdbl = PDBList()
        pdbl.retrieve_pdb_file(identifier, pdir=directory, file_format='pdb')
        print(f"PDB file '{pdb_file}' not found.")
        print(f"Current directory contents: {os.listdir(directory)}")
        
        # Rename the file to match the identifier
        fetched_file = os.path.join(directory, f"pdb{identifier.lower()}.ent")
        if os.path.exists(fetched_file):
            os.rename(fetched_file, pdb_file)
    
    try:
        with open(pdb_file, 'r') as f:
            pdb_content = f.read()
    except Exception as e:
        print(f"Error reading PDB file '{pdb_file}': {e}")
        return None
    
    return pdb_content

def get_chain_identifiers(pdb_content):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('protein', StringIO(pdb_content))
    chains = set()
    for model in structure:
        for chain in model:
            chains.add(chain.id)
    print()
    return sorted(list(chains))

In [58]:
def update_visibility_protein(style):
    if style == 'cartoon':
        radius_protein.layout.visibility = 'hidden'
        scale_protein.layout.visibility = 'hidden'
    elif style =='stick':
        radius_protein.layout.visibility = 'visible'
        scale_protein.layout.visibility = 'hidden'
    elif style =='sphere':
        radius_protein.layout.visibility = 'hidden'
        scale_protein.layout.visibility = 'visible'

def chain_styles_dict(selected_chains, chain_style, chain_color, chain_radius, chain_scale):
    """Create a dictionary of chain styles based on selected chains and their styles."""
    styles = {}
    for chain in selected_chains:
        styles[chain] = {
            'style': chain_style,
            'color': chain_color,
            'radius': chain_radius,
            'scale': chain_scale
        }
    return styles

In [59]:
identifier_widget = Text(
    value='1ZNI',  # Default value
    placeholder='Enter identifier...',
    description='Identifier:',
    disabled=False
)

style_protein = Dropdown(
    options=['cartoon', 'stick', 'sphere',],
    value='cartoon',
    description='Style:')

color_protein = Dropdown(
    options=['spectrum', 'chain', 'element'],
    value='spectrum',
    description='Color:'
)

radius_protein = FloatSlider(
    value=0.2,
    min=0,
    max=1,
    step=0.1,
    continuous_update=False,
    description='Atomic radius size') 

scale_protein = FloatSlider(
    value=1,
    min=0,
    max=1,
    step=0.1,
    continuous_update=False,
    description='Sphere size')

asa_checkbox = Checkbox(
    value = False,
    description = 'Add surface'
)

asa_slider = FloatSlider(
    value=1,
    min=0,
    max=1,
    step=0.1,
    continuous_update=False,
    description='ASA surface opacity')

chain_select = Dropdown(
    options=[],  # This will be updated dynamically
    value=None,
    description='Chains:'
)

chain_style = Dropdown(
    options=['cartoon', 'stick', 'sphere'],
    value='cartoon',
    description='Chain Style:'
)

chain_color = Dropdown(
    options=['spectrum', 'chain', 'element'],
    value='spectrum',
    description='Chain Color:'
)

chain_radius = FloatSlider(
    value=0.2,
    min=0,
    max=1,
    step=0.1,
    continuous_update=False,
    description='Chain Atomic radius size'
) 

chain_scale = FloatSlider(
    value=1,
    min=0,
    max=1,
    step=0.1,
    continuous_update=False,
    description='Chain Sphere size'
)

In [60]:
def add_protein_model(view, pdb_content, global_style, global_color, global_radius, global_scale, chain_styles, chain_select, surface, opacity):
    """Add the PDB model to the 3Dmol view and apply the given style."""
    view.addModel(pdb_content, 'pdb')
    view.setBackgroundColor('#000000')
    
    # Apply global styles
    if global_style == 'cartoon':
        view.setStyle({'cartoon': {'color': global_color}})
    elif global_style == 'stick':
        view.setStyle({'stick': {'colorscheme': global_color, 'radius': global_radius}})
    elif global_style == 'sphere':
        view.setStyle({'sphere': {'colorscheme': global_color, 'scale': global_scale}})
    
    # Apply chain-specific styles
    for chain, style in chain_styles.items():
        if style['style'] == 'cartoon':
            view.setStyle({'chain': chain, 'cartoon': {'color': style['color']}})
        elif style['style'] == 'stick':
            view.setStyle({'chain': chain, 'stick': {'colorscheme': style['color'], 'radius': style['radius']}})
        elif style['style'] == 'sphere':
            view.setStyle({'chain': chain, 'sphere': {'colorscheme': style['color'], 'scale': style['scale']}})
    
    if surface:
        view.addSurface(py3Dmol.VDW, {'opacity': opacity, 'color': 'spectrum'})


In [61]:
def viewProtein(identifier, global_style='cartoon', global_color='spectrum', global_radius=0.2, global_scale=1, chain_styles={}, chain_select=None, surface=False, opacity=1):
    """Visualize a molecule in 3D"""
    
    if identifier not in protein_cache:
        # Fetch the PDB file for the new protein identifier
        pdb_content = get_pdb(identifier)
        if not pdb_content:
            print("Failed to fetch PDB file.")
            return None
        # Cache the loaded PDB file
        protein_cache[identifier] = pdb_content
    else:
        pdb_content = protein_cache[identifier]
    
    view = py3Dmol.view(width=800, height=800)
    add_protein_model(view, pdb_content, global_style, global_color, global_radius, global_scale, chain_styles, chain_select, surface, opacity)
    view.zoomTo()
    return view

def update_chain_select(identifier):
    pdb_content = get_pdb(identifier)
    if not pdb_content:
        return None
    chains = get_chain_identifiers(pdb_content)
    chain_select.options = chains
    chain_select.value = chains[0]

def update_protein_viewer(identifier, global_style, global_color, global_radius, global_scale, chain_styles, chain_select, surface, opacity):
    view = viewProtein(
        identifier=identifier,
        global_style=global_style,
        global_color=global_color,
        global_radius=global_radius,
        global_scale=global_scale,
        chain_styles=chain_styles,
        chain_select=chain_select,
        surface=surface,
        opacity=opacity
    )
    display(view)

interact(update_protein_viewer,
         identifier=identifier_widget,
         global_style=style_protein,
         global_color=color_protein,
         global_radius=radius_protein,
         global_scale=scale_protein,
         chain_styles=fixed({}),
         chain_select=chain_select,
         surface=asa_checkbox,
         opacity=asa_slider)

style_protein.observe(lambda change: update_visibility_protein(change['new']), names='value')
#update_chain_select('1ZNI')

interactive(children=(Text(value='1ZNI', description='Identifier:', placeholder='Enter identifier...'), Dropdo…

In [62]:
indexs_checkbox = Checkbox(
    value = False,
    description = 'Index atoms (small)'
)

indexl_checkbox = Checkbox(
    value = False,
    description = 'Index atoms (large)'
)

stereo_checkbox = Checkbox(
    value = False,
    description = 'Show stereochemistry' 
)

substructure_input = Text(
    value='',
    description='Enter SMILES substructure'
)

abbr_checkbox = Checkbox(
    value = False,
    description = 'Use abbreviations'
)

In [63]:
def indexs_atoms(molecule):
    IPythonConsole.drawOptions.addAtomIndices = True
    return molecule

def indexl_atoms(molecule):
    for atom in molecule.GetAtoms():
        atom.SetAtomMapNum(atom.GetIdx())
    return molecule

def stereo_atoms(molecule):
    IPythonConsole.drawOptions.addStereoAnnotation = True
    rdCIPLabeler.AssignCIPLabels(molecule)
    return molecule

def get_substructure(molecule, smarts):
    substructure = Chem.MolFromSmarts(smarts)
    return molecule.GetSubstructMatches(substructure)

def vis_abbr(molecule):
    abbrevs = rdAbbreviations.GetDefaultAbbreviations()
    return rdAbbreviations.CondenseMolAbbreviations(molecule, abbrevs)

Explain view2D

In [64]:
def view2D(smi, indexs = False, indexl = False, stereo = False, abbreviations =  False, substructure=''):
    molecule = Chem.MolFromSmiles(smi)    
    if molecule:
        if indexs:
            molecule = indexs_atoms(molecule)
        elif indexs == False:
            IPythonConsole.drawOptions.addAtomIndices = False
        if indexl:
            molecule = indexl_atoms(molecule)
        if stereo:
            molecule = stereo_atoms(molecule)
        elif stereo == False:
            IPythonConsole.drawOptions.addStereoAnnotation = False
        if abbreviations:
            molecule = vis_abbr(molecule)
        if substructure:
            matches = get_substructure(molecule, substructure)
        return molecule
    else:
        print(f"The molecule name '{molecule_name}' is not valid.")
        return None

def handle_checkbox(smi, indexs, indexl, stereo, abbreviations, substructure):
    return view2D(smi, indexs, indexl, stereo, abbreviations, substructure)

interact(handle_checkbox, smi='', 
         indexs=indexs_checkbox, 
         indexl=indexl_checkbox, 
         stereo=stereo_checkbox,
         abbreviations=abbr_checkbox,
         substructure=substructure_input)

interactive(children=(Text(value='', description='smi'), Checkbox(value=False, description='Index atoms (small…

<function __main__.handle_checkbox(smi, indexs, indexl, stereo, abbreviations, substructure)>

In [65]:
from rdkit import Chem
from rdkit.Chem import Draw
from ipywidgets import interact, Checkbox, Text
from IPython.display import display

# Function to add indices to atoms
def indexs_atoms(molecule):
    for atom in molecule.GetAtoms():
        atom.SetProp('atomLabel', str(atom.GetIdx()))
    return molecule

# Function to add labels to atoms
def indexl_atoms(molecule):
    for atom in molecule.GetAtoms():
        atom.SetProp('atomLabel', atom.GetSymbol() + str(atom.GetIdx()))
    return molecule

# Function to handle stereochemistry annotations
def stereo_atoms(molecule):
    for bond in molecule.GetBonds():
        if bond.GetStereo() != Chem.rdchem.BondStereo.STEREONONE:
            bond.SetProp('bondNote', str(bond.GetStereo()))
    return molecule

# Function to visualize abbreviations
def vis_abbr(molecule):
    # This is a placeholder for the actual abbreviation visualization logic
    # You will need to implement the logic based on how you want to handle abbreviations
    return molecule

# Function to get substructure matches
def get_substructure(molecule, substructure):
    substructure_mol = Chem.MolFromSmarts(substructure)
    if substructure_mol is not None:
        return molecule.GetSubstructMatches(substructure_mol)
    else:
        print(f"The substructure '{substructure}' is not valid.")
        return []

# Example usage in the view2D function:
# if substructure:
#     matches = get_substructure(molecule, substructure)
#     molecule = Draw.rdMolDraw2D.PrepareAndDrawMolecule(molecule, highlightAtoms=matches)


# Improved view2D function
def view2D(smi, indexs=True, indexl=True, stereo=True, abbreviations=True, substructure=''):
    molecule = Chem.MolFromSmiles(smi)
    if molecule:
        Draw.PrepareMolForDrawing(molecule)  
        if indexs:
            molecule = indexs_atoms(molecule)
        if indexl:
            molecule = indexl_atoms(molecule)
        if stereo:
            molecule = stereo_atoms(molecule)
        if abbreviations:
            molecule = vis_abbr(molecule)
        if substructure:
            matches = get_substructure(molecule, substructure)
            molecule = Draw.rdMolDraw2D.PrepareAndDrawMolecule(molecule, highlightAtoms=matches)
        else:
            molecule = Draw.MolToImage(molecule)  
        display(molecule)  
    else:
        print(f"The SMILES string '{smi}' is not valid.")
        return None

# Function to handle the interaction with checkboxes
def handle_checkbox(smi, indexs, indexl, stereo, abbreviations, substructure):
    view2D(smi, indexs, indexl, stereo, abbreviations, substructure)

# Widgets for user interaction
indexs_checkbox = Checkbox(description='Show Atom Indices')
indexl_checkbox = Checkbox(description='Show Label Indices')
stereo_checkbox = Checkbox(description='Show Stereochemistry')
abbr_checkbox = Checkbox(description='Show Abbreviations')
substructure_input = Text(description='Substructure')

# Interactive widget
interact(handle_checkbox, smi=Text(description='SMILES String'), 
         indexs=indexs_checkbox, 
         indexl=indexl_checkbox, 
         stereo=stereo_checkbox,
         abbreviations=abbr_checkbox,
         substructure=substructure_input)


interactive(children=(Text(value='', description='SMILES String'), Checkbox(value=False, description='Show Ato…

<function __main__.handle_checkbox(smi, indexs, indexl, stereo, abbreviations, substructure)>

In [22]:
m2 = converter('6137', 'cid', cache)

In [25]:
n = m2.convert('smiles')

In [26]:
print(n)

CSCC[C@@H](C(=O)O)N


In [None]:
m1 = converter('MEFKEPWMEQBLKI-AIRLBKTGSA-N', 'inchikey', cache)
m = m1.convert('smiles')
mn = m1.convert('name')
mi = m1.convert('inchi')

In [None]:
m = converter('Erythronolide B', 'name')
name = m.convert('name')
cid = m.convert('cid')
smi = m.convert('smiles')
print(smi)
#name_file = name + '.sdf'
#pcp.download('SDF', name_file, cid, 'cid', overwrite=True)

In [66]:
if isinstance(self, (float,int)):
    raise ValueError(  
        f"Invalid input: '{self}'."
        " Input valid SMILES"
    )

try:
    smi = str(self)
        
except ValueError:     
    raise ValueError(  
        f"Invalid input: '{self}'."
        " `SMILES` must be a string, or convertible to a string."
        f" Original error message: {e}"
    )

except IndexError:
    return None

NameError: name 'self' is not defined

In [67]:
"""SOME PROBLEMS TO FIX :
    - Benzene smiles C1=CC=CC=C1 gives D6h (correct)
    - But inputting "benzene" gives sometimes Cs sometimes Ci
    - Methane gives C1 instead of Td
    - Dones't work on SF6
"""

def get_molecule_name_from_smiles(smiles):
    compounds = pcp.get_compounds(smiles, 'smiles')
    if compounds:
        # Assuming the first compound is the one we want
        compound = compounds[0]
        return compound.iupac_name  # or compound.common_name for common name
    else:
        return "No compound found for the given SMILES."

def get_smiles_from_name_or_confirm_smiles(input_string):
    # This part checks if the input is already a valid SMILES.
    if Chem.MolFromSmiles(input_string) is not None:
        return input_string  # This will return the input SMILES.

    # If it's not, it will search to convert it to SMILES
    compounds = pcp.get_compounds(input_string, 'name')
    if compounds:
        compound = compounds[0]
        return compound.isomeric_smiles
    else:
        return None  # No valid compound was found for the input

def point_group_from_smiles(smiles):  

    # Convert SMILES to a molecule object
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        print("Invalid SMILES input. Please provide a valid SMILES string.")
    else:
        # Add hydrogens to the molecule
        mol_with_h = Chem.AddHs(mol)

        # Generate 3D coordinates
        AllChem.EmbedMolecule(mol_with_h, AllChem.ETKDG())

        # Create a list to store coordinates
        coordinates_list = []
        atom_symbols = []
        
        # Iterate over atoms and get their coordinates
        for atom in mol_with_h.GetAtoms():
            pos = mol_with_h.GetConformer().GetAtomPosition(atom.GetIdx())
            coordinates_list.append([pos.x, pos.y, pos.z])
            atom_symbols.append(atom.GetSymbol())

    pg = PointGroup(positions= coordinates_list, symbols=atom_symbols)

    return pg.get_point_group()

if __name__ == "__main__":
    input_string = input("Input SMILES or molecule name: ")
    smiles_name = get_smiles_from_name_or_confirm_smiles(input_string)
    pg = point_group_from_smiles(smiles_name)
    mol_name = get_molecule_name_from_smiles(smiles_name)

    if pg:
        print(f"Point group of {mol_name} is {pg}")


ValueError: molecule has no atoms

Function `identify_chemical_identifier(input_string)` takes as input the identifier name and outputs the converted name of the molecule. It does this by analyzing the string and looking for patterns. For example, in CID there can only be numbers. This work for the following identifier_types : CID, SMILES, InChI, InChIKey and the name.

Let's take for example `input_string = "VNWKTOKETHGBQD-UHFFFAOYSA-N"`, which is the InChIKey for methane
If we parse this string into the function we obtain:

In [70]:
input_string = "VNWKTOKETHGBQD-UHFFFAOYSA-N"
mol_name = identify_chemical_identifier(input_string)
print(mol_name)

methane


If you got an error message and then the molecule name, this is normal. Because we are caching the files, they may not be yet generated, however you will still get the molecule name.

In [39]:
def identify_chemical_identifier(input_string):
    cid_pattern = r'^\d+$'
    smiles_pattern = r'^[CcNnOoPpSsFfClBrIi%0-9=\-\[\]\(\)\/\+\#\$:\.\,\\\/\@]+$'
    inchi_pattern = r'^InChI=1S?\/[0-9A-Za-z\.\/\-\(\),]+$'
    inchikey_pattern = r'^[A-Z]{14}-[A-Z]{10}-[A-Z]$'
    name_pattern = r'^[a-zA-Z0-9\s\-]+[a-zA-Z0-9\s\-]*$'

    if re.match(cid_pattern, input_string):
        mol_name_input = converter(input_string, "CID", cache)
        mol_name = mol_name_input.convert("name")
        return mol_name
    elif re.match(smiles_pattern, input_string):
        mol_name_input = converter(input_string, "SMILES", cache)
        mol_name = mol_name_input.convert("name")
        return mol_name
    elif re.match(inchi_pattern, input_string):
        mol_name_input = converter(input_string, "inchi", cache)
        mol_name = mol_name_input.convert("name")
        return mol_name
    elif re.match(inchikey_pattern, input_string):
        mol_name_input = converter(input_string, "inchikey", cache)
        mol_name = mol_name_input.convert("name")
        return mol_name
    elif re.match(name_pattern, input_string):
        if is_valid_molecule(input_string):
            return input_string
        else:
            return "Error"
    else:
        return "Unknown format"

The `pg_from_sdf(identifier, identifier_type)` function takes as input the `identifier` of the molecule as well as its `identifier_type` and outputs the point group of the molecule. <br>

It works by converting the input into a sdf file, and then getting the 3D coordinates of all the molecules. Then, using the `pymsym` package, we can use the `pymsym.get_point_group()` function which takes as input the coordinates and atomic numbers of all molecules. It then do some mathematics calculations to **estimate** the point group. <br>

I am putting an accent on the word "estimate" because this function is not very precise. I had lot of false results. I believe it does not support high symmetry point groups like Oh, Td, Ih etc. For example, if we try to get the point group of methane, we obtain "Kh" which is the point group of a sphere, instead of Td. Another example is SF6, we obtain D6h for it which is wrong and should be in reality Oh.
<br>
<br>
However it does work for lot of other molecules ! <br>
Let's try with benzene: we obtain D6h which is correct ! <br>
Or carbon monide: we obtain Cinfv which is also correct.


To conclude with this function, I had lot of problem coding it. I am not really satisfied with it due to the high number of errors in it. Nevertheless I still think it was worth it becuase it taught a difference approach to point groups: using coordinates. 

In [78]:
def pg_from_sdf(identifier, identifier_type):
    sdf_string = get_sdf(identifier, identifier_type)
    mol = Chem.MolFromMolBlock(sdf_string)
    if mol is not None:
        conf = mol.GetConformer()
        coordinates_list = []
        atomic_numbers = []
        for atom in mol.GetAtoms():
            aid = atom.GetIdx()
            pos = conf.GetAtomPosition(aid)
            coordinates_list.append([pos.x, pos.y, pos.z])
            atomic_numbers.append(atom.GetAtomicNum())
        pg = pymsym.get_point_group(atomic_numbers=atomic_numbers, positions=coordinates_list)
        return pg
    else:
        print("Invalid SDF input. Please provide a valid SDF string.")
        return None

if __name__ == "__main__":
    # Example identifier and type
    identifier = "281" # Replace with your identifier
    identifier_type=['Name', 'SMILES', 'InChi', 'InChiKey', 'CID']
    mol_name = identify_chemical_identifier(input_string=identifier)

    sdf_string = get_sdf(identifier, identifier_type)
    pg = pg_from_sdf(identifier, identifier_type)
    
    if pg:
        print(f"Point group of {mol_name} is: {pg}")
    else:
        print("Point group could not be determined.")

Point group of carbon monoxide is: Cinfv


In [81]:
from pyscf import gto, scf, geomopt



def optimize_geometry(sdf_content):
    # Create a molecule object from the SDF content
    mol = gto.Mole()
    mol.fromstring(sdf_content, format='sdf')
    mol.build()

    # Run the Hartree-Fock calculation
    mf = scf.RHF(mol)
    mf.kernel()

    # Perform the geometry optimization
    optimized_mol = geomopt.optimize(mf)

    return optimized_mol

def perform_optimization(identifier, identifier_type):
    # Retrieve the SDF file using the provided function
    sdf_data = get_sdf(identifier, identifier_type)
    
    # Perform geometry optimization
    optimized_molecule = optimize_geometry(sdf_data)
    
    # Return the optimized molecule
    return optimized_molecule

# Example usage:
optimized = perform_optimization('methane', 'name')
print(optimized.atom_coords())


converged SCF energy = -37.572819212192


geometric-optimize called with the following command line:
/Users/alexandrelauris/miniconda3/envs/molvizpy/lib/python3.12/site-packages/ipykernel_launcher.py --f=/Users/alexandrelauris/Library/Jupyter/runtime/kernel-v2-25664ofNrHDtrasny.json

                                        [91m())))))))))))))))/[0m                     
                                    [91m())))))))))))))))))))))))),[0m                
                                [91m*)))))))))))))))))))))))))))))))))[0m             
                        [94m#,[0m    [91m()))))))))/[0m                [91m.)))))))))),[0m          
                      [94m#%%%%,[0m  [91m())))))[0m                        [91m.))))))))*[0m        
                      [94m*%%%%%%,[0m  [91m))[0m              [93m..[0m              [91m,))))))).[0m      
                        [94m*%%%%%%,[0m         [93m***************/.[0m        [91m.)))))))[0m     
                [94m#%%/[0m      [94m(%%%%%%,[0m 


Geometry optimization cycle 1
Cartesian coordinates (Angstrom)
 Atom        New coordinates             dX        dY        dZ
   C   2.536900   0.000000   0.000000    0.000000  0.000000  0.000000
   H   3.073900   0.310000   0.000000   -0.000000 -0.000000  0.000000
   H   2.000000  -0.310000   0.000000    0.000000  0.000000  0.000000
   H   2.226900   0.536900   0.000000    0.000000  0.000000  0.000000
   H   2.846900  -0.536900   0.000000    0.000000  0.000000  0.000000
converged SCF energy = -37.5728192121921
--------------- RHF_Scanner gradients ---------------
         x                y                z
0 C    -0.0006793320    -0.0005157965     0.0000000000
1 H    -1.4668396809    -0.8468271180     0.0000000000
2 H     1.4675499949     0.8473429222     0.0000000000
3 H     0.8473043937    -1.4675417297     0.0000000000
4 H    -0.8473353758     1.4675417220    -0.0000000000
----------------------------------------------
cycle 1: E = -37.5728192122  dE = -37.5728  norm(grad) = 3.3

Step    0 : Gradient = 1.515e+00/1.695e+00 (rms/max) Energy = -37.5728192122
Hessian Eigenvalues: 3.09500e-02 4.28750e-02 5.00000e-02 ... 1.11646e+00 1.19712e+00 1.23856e+00



Geometry optimization cycle 2
Cartesian coordinates (Angstrom)
 Atom        New coordinates             dX        dY        dZ
   C   2.536907   0.000006   0.000000    0.000007  0.000006  0.000000
   H   3.170714   0.365895   0.000000    0.096814  0.055895  0.000000
   H   1.903173  -0.365904   0.000000   -0.096827 -0.055904  0.000000
   H   2.170997   0.633730  -0.000000   -0.055903  0.096830 -0.000000
   H   2.902809  -0.633726   0.000000    0.055909 -0.096826  0.000000
converged SCF energy = -38.5967807801246
--------------- RHF_Scanner gradients ---------------
         x                y                z
0 C    -0.0002308727    -0.0001716045     0.0000000000
1 H    -0.7146140458    -0.4125624590    -0.0000000000
2 H     0.7148412978     0.4127425699     0.0000000000
3 H     0.4127485975    -0.7148814639     0.0000000000
4 H    -0.4127449768     0.7148729575    -0.0000000000
----------------------------------------------
cycle 2: E = -38.5967807801  dE = -1.02396  norm(grad) = 1.6

Step    1 : Displace = [0m1.000e-01[0m/[0m1.118e-01[0m (rms/max) Trust = 1.000e-01 (=) Grad = [0m7.382e-01[0m/[0m8.255e-01[0m (rms/max) E (change) = -38.5967807801 ([0m-1.024e+00[0m) Quality = [0m0.904[0m
Hessian Eigenvalues: 3.09500e-02 4.28750e-02 5.00000e-02 ... 1.19712e+00 1.23856e+00 1.37101e+00



Geometry optimization cycle 3
Cartesian coordinates (Angstrom)
 Atom        New coordinates             dX        dY        dZ
   C   2.536919   0.000011  -0.000000    0.000012  0.000005 -0.000000
   H   3.262681   0.419006  -0.000000    0.091967  0.053111 -0.000000
   H   1.811227  -0.419000   0.000000   -0.091946 -0.053096  0.000000
   H   2.117890   0.725694   0.000000   -0.053108  0.091965  0.000000
   H   2.955884  -0.725710  -0.000000    0.053075 -0.091984 -0.000000
converged SCF energy = -39.0676548148295
--------------- RHF_Scanner gradients ---------------
         x                y                z
0 C    -0.0001503533    -0.0001107975     0.0000000000
1 H    -0.3391950237    -0.1958295374     0.0000000000
2 H     0.3393328633     0.1959271519    -0.0000000000
3 H     0.1959200866    -0.3393198249    -0.0000000000
4 H    -0.1959075728     0.3393330080     0.0000000000
----------------------------------------------
cycle 3: E = -39.0676548148  dE = -0.470874  norm(grad) = 0.

Step    2 : Displace = [0m9.498e-02[0m/[0m1.062e-01[0m (rms/max) Trust = 1.414e-01 ([92m+[0m) Grad = [0m3.504e-01[0m/[0m3.918e-01[0m (rms/max) E (change) = -39.0676548148 ([0m-4.709e-01[0m) Quality = [0m1.421[0m
Hessian Eigenvalues: 3.09500e-02 4.28750e-02 5.00000e-02 ... 7.20233e-01 1.19712e+00 1.23856e+00



Geometry optimization cycle 4
Cartesian coordinates (Angstrom)
 Atom        New coordinates             dX        dY        dZ
   C   2.536940   0.000021  -0.000000    0.000022  0.000010  0.000000
   H   3.345776   0.466998  -0.000000    0.083095  0.047993 -0.000000
   H   1.728147  -0.466977   0.000000   -0.083080 -0.047977  0.000000
   H   2.069902   0.808787  -0.000000   -0.047987  0.083093 -0.000000
   H   3.003834  -0.808829  -0.000000    0.047950 -0.083119  0.000000
converged SCF energy = -39.2648414014962
--------------- RHF_Scanner gradients ---------------
         x                y                z
0 C    -0.0000680053    -0.0000500358     0.0000000000
1 H    -0.1484296061    -0.0856929360     0.0000000000
2 H     0.1484826019     0.0857294993    -0.0000000000
3 H     0.0857358018    -0.1484731644    -0.0000000000
4 H    -0.0857207923     0.1484866370    -0.0000000000
----------------------------------------------
cycle 4: E = -39.2648414015  dE = -0.197187  norm(grad) = 0.

Step    3 : Displace = [0m8.582e-02[0m/[0m9.596e-02[0m (rms/max) Trust = 2.000e-01 ([92m+[0m) Grad = [0m1.533e-01[0m/[0m1.715e-01[0m (rms/max) E (change) = -39.2648414015 ([0m-1.972e-01[0m) Quality = [0m1.388[0m
Hessian Eigenvalues: 3.09500e-02 4.28750e-02 5.00000e-02 ... 6.67744e-01 1.19712e+00 1.23856e+00



Geometry optimization cycle 5
Cartesian coordinates (Angstrom)
 Atom        New coordinates             dX        dY        dZ
   C   2.536963   0.000032  -0.000000    0.000023  0.000012 -0.000000
   H   3.410433   0.504334  -0.000000    0.064657  0.037335 -0.000000
   H   1.663509  -0.504297   0.000000   -0.064637 -0.037319  0.000000
   H   2.032554   0.873429  -0.000000   -0.037348  0.064642 -0.000000
   H   3.041140  -0.873498   0.000000    0.037306 -0.064669  0.000000
converged SCF energy = -39.3293818373074
--------------- RHF_Scanner gradients ---------------
         x                y                z
0 C    -0.0000312689    -0.0000236374     0.0000000000
1 H    -0.0558078864    -0.0322177828     0.0000000000
2 H     0.0558275624     0.0322312080     0.0000000000
3 H     0.0322397757    -0.0558202879    -0.0000000000
4 H    -0.0322281828     0.0558305000    -0.0000000000
----------------------------------------------
cycle 5: E = -39.3293818373  dE = -0.0645404  norm(grad) = 0

Step    4 : Displace = [0m6.677e-02[0m/[0m7.466e-02[0m (rms/max) Trust = 2.828e-01 ([92m+[0m) Grad = [0m5.765e-02[0m/[0m6.446e-02[0m (rms/max) E (change) = -39.3293818373 ([0m-6.454e-02[0m) Quality = [0m1.334[0m
Hessian Eigenvalues: 3.09500e-02 4.28750e-02 5.00000e-02 ... 6.67744e-01 1.19712e+00 1.23856e+00



Geometry optimization cycle 6
Cartesian coordinates (Angstrom)
 Atom        New coordinates             dX        dY        dZ
   C   2.536982   0.000043  -0.000000    0.000019  0.000010 -0.000000
   H   3.449396   0.526826  -0.000000    0.038963  0.022492 -0.000000
   H   1.624565  -0.526776   0.000000   -0.038945 -0.022479 -0.000000
   H   2.010041   0.912371   0.000000   -0.022514  0.038942  0.000000
   H   3.063617  -0.912464   0.000000    0.022477 -0.038965  0.000000
converged SCF energy = -39.343056194455
--------------- RHF_Scanner gradients ---------------
         x                y                z
0 C    -0.0000158729    -0.0000125488     0.0000000000
1 H    -0.0154358488    -0.0089100553     0.0000000000
2 H     0.0154432098     0.0089152212     0.0000000000
3 H     0.0089213791    -0.0154382139    -0.0000000000
4 H    -0.0089128672     0.0154455969    -0.0000000000
----------------------------------------------
cycle 6: E = -39.3430561945  dE = -0.0136744  norm(grad) = 0.

Step    5 : Displace = [0m4.023e-02[0m/[0m4.499e-02[0m (rms/max) Trust = 3.000e-01 ([92m+[0m) Grad = [0m1.595e-02[0m/[0m1.783e-02[0m (rms/max) E (change) = -39.3430561945 ([0m-1.367e-02[0m) Quality = [0m1.248[0m
Hessian Eigenvalues: 3.09500e-02 4.28750e-02 5.00000e-02 ... 6.67744e-01 1.19712e+00 1.23856e+00



Geometry optimization cycle 7
Cartesian coordinates (Angstrom)
 Atom        New coordinates             dX        dY        dZ
   C   2.536994   0.000050  -0.000000    0.000012  0.000007 -0.000000
   H   3.464298   0.535425  -0.000000    0.014903  0.008600 -0.000000
   H   1.609675  -0.535368  -0.000000   -0.014890 -0.008592 -0.000000
   H   2.001424   0.927258   0.000000   -0.008617  0.014887  0.000000
   H   3.072208  -0.927365   0.000000    0.008592 -0.014902  0.000000
converged SCF energy = -39.3443779819026
--------------- RHF_Scanner gradients ---------------
         x                y                z
0 C    -0.0000104430    -0.0000084236     0.0000000000
1 H    -0.0023644679    -0.0013643441     0.0000000000
2 H     0.0023672431     0.0013666021     0.0000000000
3 H     0.0013709227    -0.0023639031    -0.0000000000
4 H    -0.0013632549     0.0023700688    -0.0000000000
----------------------------------------------
cycle 7: E = -39.3443779819  dE = -0.00132179  norm(grad) = 

Step    6 : Displace = [0m1.538e-02[0m/[0m1.721e-02[0m (rms/max) Trust = 3.000e-01 (=) Grad = [0m2.444e-03[0m/[0m2.734e-03[0m (rms/max) E (change) = -39.3443779819 ([0m-1.322e-03[0m) Quality = [0m1.140[0m
Hessian Eigenvalues: 3.09500e-02 4.28750e-02 5.00000e-02 ... 6.67744e-01 1.19712e+00 1.23856e+00



Geometry optimization cycle 8
Cartesian coordinates (Angstrom)
 Atom        New coordinates             dX        dY        dZ
   C   2.537001   0.000054  -0.000000    0.000007  0.000004 -0.000000
   H   3.466999   0.536982  -0.000000    0.002701  0.001557 -0.000000
   H   1.606983  -0.536920  -0.000000   -0.002692 -0.001552 -0.000000
   H   1.999857   0.929949   0.000000   -0.001566  0.002691  0.000000
   H   3.073759  -0.930065   0.000000    0.001551 -0.002700  0.000000
converged SCF energy = -39.3444117353461
--------------- RHF_Scanner gradients ---------------
         x                y                z
0 C    -0.0000088252    -0.0000070092     0.0000000000
1 H    -0.0001233659    -0.0000709459     0.0000000000
2 H     0.0001239647     0.0000719403     0.0000000000
3 H     0.0000759902    -0.0001215602    -0.0000000000
4 H    -0.0000677638     0.0001275750    -0.0000000000
----------------------------------------------
cycle 8: E = -39.3444117353  dE = -3.37534e-05  norm(grad) =

Step    7 : Displace = [0m2.784e-03[0m/[0m3.118e-03[0m (rms/max) Trust = 3.000e-01 (=) Grad = [92m1.283e-04[0m/[92m1.445e-04[0m (rms/max) E (change) = -39.3444117353 ([0m-3.375e-05[0m) Quality = [0m1.050[0m
Hessian Eigenvalues: 3.09500e-02 4.28750e-02 5.00000e-02 ... 6.67744e-01 1.19712e+00 1.23856e+00



Geometry optimization cycle 9
Cartesian coordinates (Angstrom)
 Atom        New coordinates             dX        dY        dZ
   C   2.537007   0.000057  -0.000000    0.000006  0.000003  0.000000
   H   3.467152   0.537070  -0.000000    0.000153  0.000088 -0.000000
   H   1.606837  -0.537003  -0.000000   -0.000146 -0.000083 -0.000000
   H   1.999764   0.930094   0.000000   -0.000093  0.000145  0.000000
   H   3.073840  -0.930218   0.000000    0.000081 -0.000153  0.000000
converged SCF energy = -39.3444118297713
--------------- RHF_Scanner gradients ---------------
         x                y                z
0 C    -0.0000082222    -0.0000063684     0.0000000000
1 H    -0.0000010208    -0.0000005198     0.0000000002
2 H     0.0000002568     0.0000007477     0.0000000002
3 H     0.0000050724     0.0000016078    -0.0000000002
4 H     0.0000039137     0.0000045327    -0.0000000002
----------------------------------------------
cycle 9: E = -39.3444118298  dE = -9.44252e-08  norm(grad) =

Step    8 : Displace = [92m1.543e-04[0m/[92m1.763e-04[0m (rms/max) Trust = 3.000e-01 (=) Grad = [92m5.904e-06[0m/[92m1.040e-05[0m (rms/max) E (change) = -39.3444118298 ([92m-9.443e-08[0m) Quality = [0m1.010[0m
Hessian Eigenvalues: 3.09500e-02 4.28750e-02 5.00000e-02 ... 6.67744e-01 1.19712e+00 1.23856e+00
Converged! =D

    #| If this code has benefited your research, please support us by citing: |#
    #|                                                                        |#
    #| Wang, L.-P.; Song, C.C. (2016) "Geometry optimization made simple with |#
    #| translation and rotation coordinates", J. Chem, Phys. 144, 214108.     |#
    #| http://dx.doi.org/10.1063/1.4952956                                    |#
    Time elapsed since start of run_optimizer: 2.317 seconds


[[ 4.79424831e+00  1.08215073e-04 -3.41128100e-14]
 [ 6.55196844e+00  1.01491501e+00 -1.20367505e-10]
 [ 3.03648168e+00 -1.01478831e+00 -1.19641216e-10]
 [ 3.77900653e+00  1.75762255e+00  1.19881006e-10]
 [ 5.80871504e+00 -1.75785747e+00  1.19996612e-10]]
