In [1]:
from ipywidgets import widgets
from IPython.display import display, clear_output
from rdkit.Chem import PandasTools
import pandas as pd
from rdkit import Chem
import pubchempy as pcp
import io
from functools import wraps

class App:
    def __init__(self):
        self.data = None
    
    def get_molecule(self):
        for i in range(len(self.data)):
            molid = self.data.ID.loc[i]
            mol = self.data.ROMol.loc[i]
            yield molid, mol

def step(func):
    @wraps(func)
    def inner(*args, **kwargs):
        gen = func()
        gen.send(None)
        return gen
    return inner

@step
def generate_output_checkboxes():
    compound = None
    synonyms = []
    while True:
        compound = yield synonyms
        if compound:
            synonyms = compound.synonyms
        else:
            synonyms = []
        # wigs = []
        # for item in compound.synonyms:
        #    wg = widgets.Checkbox(description=item)
        #    wigs.append(wg)
            
            
def get_inchi_key(mol):
    return Chem.MolToInchiKey(mol)
            
def get_pubchem_entry(inchi_key):
    result, *_ = pcp.get_compounds(inchi_key, 'inchikey')
    return result

def update_data(state):
    if state:
        key, *synonims = state.options
        if synonims:
            for synonim in synonims:
                element = {
                    'ID' : state.description,
                    'name': state.value,
                    'inchi_key': key,
                    'synonyms': synonim,
                }
                data.append(element)
        else:
            element = {
                'ID': state.description,
                'name': state.value,
                'inchi_key': key,
                'synonyms': '',
            }
            data.append(element)
            
    
    
#### state variables #####
app = App()
syn_checker = generate_output_checkboxes()        

#### ui #####
upload = widgets.FileUpload(description='SDF upload')
batch = widgets.Text(description='UploadTag')
start = widgets.Button(description='Start Process')
next_record = widgets.Button(description = 'Next Record')

main_interaction = widgets.HBox([upload, batch, start])
out1 = widgets.Output() #main buttons
out2 = widgets.Output() #structure
choice_ui = widgets.HBox([out1, out2])
progress = widgets.IntProgress(value=0, min=0, max=10, orientation='horizontal')
main_output = widgets.VBox([main_interaction, choice_ui, progress, next_record])
finish_process = widgets.Button(description='Finnish')
#### datamodel #####
display(main_output)

#### callbacks ####

def read_sdf(data):
    sdf_string = data.decode('utf-8')
    supplier = Chem.SDMolSupplier()
    supplier.SetData(sdf_string)
    return supplier

def start_process(dummy):
    data = upload.value
    file, *_ = data.keys()
    data = data[file]['content']
    buffer = io.BytesIO(data)
    # data = data[file]['content']
    #app.data = read_sdf(data)
    app.data = PandasTools.LoadSDF(buffer)
    progress.max = len(app.data)

def finalize_process(dummy):
    global data
    base_path = '//ltvil-freenas5.thermofisher.lt/PTVG_Data/DATA/mzCloud/forUpload/NamingData/'
    file_name = batch.value + 'csv'
    path = base_path + file_name
    data = pd.DataFrame(data)
    data.to_csv(path, sep=';')
    
def get_next_molecule_closure(gen, data, progress):
    state = ''
    def inner(dummy):
        nonlocal state
        nonlocal data
        global progress
        update_data(state)
        progress.value += 1
        try:
            molid, molecule = next(gen)
        except StopIteration as e:
            with out1:
                clear_output()
                display(finish_process)
        inchi_key = get_inchi_key(molecule)
        result = get_pubchem_entry(inchi_key)
        wigs = syn_checker.send(result)
        wigs.insert(0, inchi_key)
        state = widgets.RadioButtons(options=wigs, value=wigs[0], layout={'width': 'max-content'}, description=molid)
        with out1:
            clear_output()
            display(state)
        with out2:
            clear_output()
            display(molecule)
    return inner
data = []
drive_process = get_next_molecule_closure(app.get_molecule(), data, progress)    
    
    
start.on_click(start_process)
next_record.on_click(drive_process)
finish_process.on_click(finalize_process)


VBox(children=(HBox(children=(FileUpload(value={}, description='SDF upload'), Text(value='', description='Uplo…

In [2]:
data

[]