In [2]:
import PySimpleGUI as sg
from rdkit import Chem
import pubchempy as pcp
from pubchempy import get_compounds
from rdkit.Chem import Draw
import pandas as pd
import re
import sys
import traceback
import xml.etree.ElementTree as ET
from typing import Optional
import pandas as pd
import requests

from rdkit.Chem.Draw import IPythonConsole
IPythonConsole.ipython_useSVG=True

In [6]:

#function that gets the smile for each molecule
def get_test(compound):
    results = pcp.get_compounds(compound, 'name')
    for compound in results:
        smiles= compound.isomeric_smiles
        mol=Chem.MolFromSmiles(smiles)
        return mol



#We need an entry in Cas or the name but need to specify it in the second entry of the function
# either pka_lookup_pubchem("acetic acid", "Name") or pka_lookup_pubchem("'64-19-7' ","cid")

debug = False


def pka_lookup_pubchem(identifier, namespace=None, domain='compound') -> Optional[str]:
    global debug

    if len(sys.argv) == 2 and sys.argv[1] in ['--debug=True', '--debug=true', '--debug', '-d']:
        debug = True

    # if debug:
    #     print(f'In DEBUG mode: {debug}')

    # Identify lookup source (Pubchem in this case)
    lookup_source = 'Pubchem'

    try:
        headers = {
            'user-agent': 'Mozilla/5.0 (X11; CentOS; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36'}
        # Using pubchem api for python
        # Getting CID number, the result of this, by default is exact match. The result is returned as a list.
        cids = []
        identifier_type = ''

        if not namespace:
            identifier_type = classify(identifier)

            # If the input is inchi, inchikey or smiles (this could be a false smiles):
            if identifier_type in ['smiles', 'inchi', 'inchikey']:
                lookup = pcp.get_cids(identifier, namespace=identifier_type)
                if lookup:
                    cids.append(lookup[0])
            else:
                lookup = pcp.get_cids(identifier, namespace='name')
                if lookup:
                    cids.append(lookup[0])
                    # print(f'namespace from pubchem lookup is: {namespace}')
        elif namespace == 'cas':
            cids = pcp.get_cids(identifier, namespace='name')
        else:
            cids = pcp.get_cids(identifier, namespace=namespace)

        if not cids:
            lookup = pcp.get_cids(identifier, namespace='name')
            if lookup:
                cids.append(lookup[0])

            # cids = pcp.get_cids(identifier, namespace=namespace)
            identifier_type = namespace

        if len(cids) > 0:
            # if Pubchem found the result, get the first result of the list
            cid = cids[0]

            exact_match = True

            # synonyms = []
            synonyms = pcp.get_synonyms(cid)[0]['Synonym'] or []
            
            # Extract CAS number from the list of synonyms
            returned_cas = ''
            for synonym in synonyms:
                cas_nr = re.search(r'^\d{2,7}-\d{2}-\d$', synonym)
                if cas_nr:
                    cas_nr = cas_nr.group()
                    returned_cas = cas_nr
                    break

            lookup_result = pcp.get_properties(['inchi', 'inchikey',
                                        'canonical_smiles', 'isomeric_smiles',
                                        'iupac_name'],
                                cid)

            if identifier_type == 'cas':
                # To double check if the CAS number is correct:
                # using pubchem api, get a list of synonym. The result is a list of dict.
                # choose the first result and check all values for 'Synonym' key:
                exact_match = identifier in synonyms

            elif identifier_type in ['inchi', 'inchikey']:

                if identifier_type == 'inchi':
                    # print(lookup_result[0].get('InChI', False))
                    # print(f'input:\n{identifier}')
                    exact_match = (identifier == lookup_result[0].get('InChI', False))
                
                elif identifier_type == 'inchikey':
                    exact_match = (identifier == lookup_result[0].get('InChIKey', False))

            if not exact_match:
                if debug:
                    print(f'Exact match between input and Pubchem return value? {identifier in synonyms}')
                raise ValueError('This is not an exact match on Pubchem!')


            pka_lookup_result_xml = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/{}/XML?heading=Dissociation+Constants'.format(cid)

            # Get the html request info using CID number from pubchem
            r = requests.get(pka_lookup_result_xml, headers=headers, timeout=15)
            # Check to see if give OK status (200) and not redirect
            if r.status_code == 200 and len(r.history) == 0:
                # print(r.text)
                # Use python XML to parse the return result
                tree = ET.fromstring(r.text)
            
                # Get the XML tree of  only
                info_node = tree.find('.//*{http://pubchem.ncbi.nlm.nih.gov/pug_view}Information')

                # Get the pKa reference:
                original_source = info_node.find('{http://pubchem.ncbi.nlm.nih.gov/pug_view}Reference').text
                # Get the pKa result:
                pka_result = info_node.find('.//*{http://pubchem.ncbi.nlm.nih.gov/pug_view}String').text
                pka_result = re.sub(r'^pKa = ', '', pka_result)    # remove 'pka = ' part out of the string answer
                # print(pka_result)
                # print(original_source)
                # print(lookup_result)

                core_result = {
                    'source': lookup_source,
                    'Pubchem_CID': str(cid),
                    'pKa': pka_result,
                    'reference': original_source,
                    'Substance_CASRN': returned_cas,
                }
                extra_info = lookup_result[0]
                extra_info.pop('CID', None)    # Remove 'CID': ... from lookup_result[0]

                # Merge 2 dict: https://treyhunner.com/2016/02/how-to-merge-dictionaries-in-python/
                result = {**core_result, **extra_info}
                # Rename some keys in the dict
                s = pd.Series(result)
                s = s.rename({
                    'CanonicalSMILES': 'Canonical_SMILES',
                    'IsomericSMILES': 'Isomeric_SMILES',
                    'IUPACName': 'IUPAC_Name'
                })
                result = s.to_dict()            
                return result

            else:
                raise RuntimeError('pKa not found in Pubchem.')
    
        else:
            raise RuntimeError('Compound not found in Pubchem.')

    except Exception as error:
        if debug:
            traceback_str = ''.join(traceback.format_exception(etype=type(error), value=error, tb=error.__traceback__))
            print(traceback_str)

        return None



#function that sort the list of molecule and return a list of set in order for the pka
def pka_increasing(list):
    dict={}
    for i in range (len(list)):
        pka=pka_lookup_pubchem(list[i],'name')
        dict[pka['pKa'][0:4]]=list[i]
    molecule_list_pka=dict.items()
    sorted_list = sorted(molecule_list_pka, key=lambda x: float(x[0]))
    return sorted_list


#generate a list of image

def generate_image(list_of_molecule):
    mss=[]
    for i,value in enumerate(list_of_molecule):
        molecule_name = list_of_molecule[i][1]
        mss.append(get_test(molecule_name))
    image = Draw.MolsToGridImage(mss, molsPerRow=len(list_of_molecule), subImgSize=(200, 200))

    return image


#generate_image([('3.47', 'aspirin'), ('4.75', 'acetic acid'), ('14', 'caffeine'), ('15.9', 'ethanol')]))

   
    
def molecule_list_image(molecule_list):
    image_list=[]
    for i in range(len(molecule_list)):
        molecule_image = generate_image(molecule_list)
        image_list.append(molecule_image)
    return image_list
print(molecule_list_image(["aspirin", "acetic acid"]))

    

        



[<IPython.core.display.SVG object>, <IPython.core.display.SVG object>]


In [9]:
import sys

def molecule_name(dic):
    for i in range(len(dic)):
        molecule_name=dic[0][i]
    return molecule_name

#function to check the number of molecule to be compare
def number(number):
    try:
        number=int(number)
        test="a"
    except ValueError:
        test="b"
    return test
number_entry = sg.popup_get_text(("How many molecule do you want ton compare?"), background_color="grey")

while number(number_entry)=="b":
    sg.popup_error("The entry is not valide, please enter an integer number")
    number_entry = sg.popup_get_text(("How many molecule do you want ton compare?"), background_color="grey")

#get the name of the molecule
molecule_list = []

for i in range(int(number_entry)):
    text= sg.popup_get_text(('What is the name of the molecule'), background_color="grey")
    print(text)
    compound=get_compounds(text, "name")
    print(compound)
    while not compound:
        sg.popup_error("The entered molecule is not in pubchem database")
        text= sg.popup_get_text(('Enter a valid name for the molecule'), background_color="grey")
        compound=get_compounds(text, "name")
    try:
        pka_lookup_pubchem(text, "name")
        molecule_list.append(text)
    except None:
        while pka_lookup_pubchem(text, "name") == None:
            sg.popup_error("Sorry, the pKa of this molecule is not in pubchem database")
            text=sg.popup_get_text(('Enter the name of another molecule'), background_color="grey")

molecule_in_order = pka_increasing(molecule_list)
print(molecule_in_order)

acetic acid
[Compound(176)]
[('4.75', 'acetic acid')]


In [None]:
import PySimpleGUI as sg
import base64
import cairosvg

# Sample SVG images (replace these with your actual SVG objects)
svg_images = molecule_list_image((["aspirin", "acetic acid"]))

# Convert SVG images to PNG format
png_images = [cairosvg.svg2png(svg_data=image) for image in svg_images]

# Convert PNG images to base64 strings
png_base64_images = [base64.b64encode(image).decode() for image in png_images]

# Create layout for PySimpleGUI window
layout = [
    [sg.Image(data=png_base64_images[0], key='-IMAGE-')],  # Display first image initially
    [sg.Button('Next'), sg.Button('Previous')]  # Buttons to navigate through images
]

# Create PySimpleGUI window
window = sg.Window('Image Viewer', layout)

# Event loop
index = 0
while True:
    event, _ = window.read()
    if event == sg.WINDOW_CLOSED:
        break
    elif event == 'Next':
        index = (index + 1) % len(png_base64_images)  # Cycle through images
        window['-IMAGE-'].update(data=png_base64_images[index])  # Update displayed image
    elif event == 'Previous':
        index = (index - 1) % len(png_base64_images)  # Cycle through images
        window['-IMAGE-'].update(data=png_base64_images[index])  # Update displayed image

# Close PySimpleGUI window
window.close()

OSError: no library called "cairo-2" was found
no library called "cairo" was found
no library called "libcairo-2" was found
cannot load library 'libcairo.so.2': error 0x7e.  Additionally, ctypes.util.find_library() did not manage to locate a library called 'libcairo.so.2'
cannot load library 'libcairo.2.dylib': error 0x7e.  Additionally, ctypes.util.find_library() did not manage to locate a library called 'libcairo.2.dylib'
cannot load library 'libcairo-2.dll': error 0x7e.  Additionally, ctypes.util.find_library() did not manage to locate a library called 'libcairo-2.dll'

NameError: name 'right_click_menu' is not defined