(transformation_product_prediction)=

# Transformation Product Prediction (TPP)

In [None]:
def predict_TPs(input_smiles, input_name, rr):
    """
    Pathway prediction for single compound
    :param input_smiles: input smiles of parent compound
    :param input_name: name of parent compound
    :param rr: relative reasoning object
    :return: dictionary of resulting TPs
    """
    print('\n### PREDICT TPs FOR COMPOUND {} ###\n'.format(input_name))
    num_TP = -1 # counter starts at -1, because source compound is also in the TP list
    validated_TPs = {}  # container for resulting predictions
    queued_items = [{'probability': 1, 'combined_probability': 1, 'smiles': input_smiles, 'generation': 0, 'parent_smiles': '',
                     'rules': '', 'rule_IDs': '', 'name': input_name, 'size': len(input_smiles)}]
    queue = [input_smiles]  # queue is updated after each cycle to have top TP first, list of smiles
    while num_TP < MAX_TP:
        if len(queue) == 0:
            print('\nEmpty queue - The exploration of has converged at {} predicted TPs'.format(num_TP))
            return validated_TPs # stop TP prediction
        smiles = queue.pop(0) # get top item in queue
        data = queued_items.pop(0) # remove data from queued items
        result_list = expand_smiles(smiles, rr)  # create children
        TP_dict = result_to_compound_dict(result_list)
        queue, queued_items, validated_TPs = update_queue(queue, queued_items, validated_TPs, TP_dict, data)
        validated_TPs[smiles] = data
        num_TP += 1
    return validated_TPs


def update_queue(_queue,_queued_items, _validated_TPs, _TPs, _parent_data):
    """
    Update queue with TPs predicted in current iteration
    :param _queue: ordered list of smiles to explore
    :param _queued_items: ordered list of compound dictionaries, same order as _queue
    :param _validated_TPs: list of already validated TPs for resulting pathway
    :param _TPs: predicted TPs from current iteration, to be evaluated and added to queue
    :param _parent_data: compound dictionary of the parent compound of _TPs
    :return: new_queue: new ordered list of smiles to explore
    :return _queued_items: new ordered list of compound dictionaries
    :return: _validated_TPs: updated list of already validated TPs
    """
    parent_probability = _parent_data['combined_probability']
    parent_generation = _parent_data['generation']
    parent_smiles = _parent_data['smiles']
    queue_before = len(_queue)
    for smiles in _TPs:
        data = _TPs[smiles]
        # If the probability is 0 , we don't consider the TP further
        this_probability = data['probability']
        if this_probability <= PROBABILITY_THRESHOLD:
            continue
        # If a moiety is given and it is not in SMILES, we don't follow the TP further
        if MOIETY not in smiles:
            continue
        if FOLLOW_LABELED_ATOM and ATOM_LABEL not in smiles:
            continue
        if INCLUDE_0_PROBABILITIES and this_probability == 0:
            this_probability = 0.01
        # add combined probability
        this_combined_probability = parent_probability * this_probability
        this_generation = parent_generation + 1
        rules = data['rules']
        rule_IDs = data['rule_IDs']
        # first, check if compound already in validated. if yes, update
        if smiles in _validated_TPs.keys():
            _validated_TPs[smiles] = update_compound_entry(_validated_TPs[smiles],
                                                           this_combined_probability, rules, rule_IDs,
                                                           this_generation, parent_smiles, size_metric='size',
                                                           size_value=len(smiles))
        # next, check if compound is already in queue. if yes, update
        elif smiles in _queue:
            index = _queue.index(smiles)
            assert smiles == _queued_items[index]['smiles'], \
                'smiles {} does not match smiles in {}'.format(smiles, _queued_items[index])
            _queued_items[index] = update_compound_entry(_queued_items[index],
                                                           this_combined_probability, rules, rule_IDs,
                                                           this_generation, parent_smiles, size_metric='size',
                                                           size_value=len(smiles))
        # else, add new item to queue
        else:
            data['combined_probability'] = this_combined_probability
            data['generation'] = this_generation
            data['parent_smiles'] = parent_smiles
            data['carbon_count'] = smiles.upper().count('C')
            _queued_items.append(data)
            _queue.append(smiles)
            assert len(_queued_items) == len(_queue)
    # First sort by size
    if SORT_TPS_BY_SIZE:
        _queued_items.sort(reverse=False, key=lambda x: x['size'])
    # order dict by combined probability
    _queued_items.sort(reverse=True, key=lambda x: x['combined_probability'])
    queue_after = len(_queue)
    print('Added {} smiles to queue'.format(queue_after - queue_before))
    new_queue = [] # resetting queue
    [new_queue.append(x['smiles']) for x in _queued_items]
    print ('New queue for compound', parent_smiles)
    for q in new_queue:
        print(q, _queued_items[new_queue.index(q)]['combined_probability'])

    return new_queue, _queued_items, _validated_TPs