# Data2text Natural Language Generation - Live Demo

**Authors**: Ruslan Yermakov, Angelo Ziletti, PhD (Decision Science Pharma, Bayer AG)


In this notebook, we show the text generated by fine-tuning T5 model and compare the generated results with the real text.   

### Load generated sections and original sections

In [1]:
PATH_GENERATED = '/home/ruslan_yermakov/nlg-ra/T5_experiments/RA_model/outputs/mh_test_generations_explicit_path_model.txt'
PATH_ORIGINAL = '/home/ruslan_yermakov/nlg-ra/T5_experiments/RA_model/input_data/test.target'
PATH_INPUT = '/home/ruslan_yermakov/nlg-ra/T5_experiments/RA_model/input_data/test.source'

In [2]:
# read generated text of test dataset
with open(PATH_GENERATED) as reader:    
    t5_outputs_test = reader.readlines()

In [3]:
# read the source file for generating text of test dataset
with open(PATH_INPUT) as reader:
    input_NER_test = reader.readlines()

In [4]:
# read real text from test dataset
with open(PATH_ORIGINAL) as reader:
    original_outputs_test = reader.readlines()

---------------------------------------------

### Helper Functions

In [5]:
import html
from IPython.core.display import display, HTML
import re

def make_nice_print(text):
    """Remove whitespaces around punctuations"""
    
    text = text.replace(" , ", ", ") 
    text = text.replace(" . ", ". ")
    text = text.replace(" .", ". ")
    text = text.replace("  ", " ")
    text = text.replace(" ’ ","’")
    text = text.replace(" ' ","' ")
    text = text.replace(" ´ ","´ ")
    
    text = text.replace(" ( "," (")
    text = text.replace(" )",")")
    text = text.replace(" ´ ","´ ")
    text = text.replace(" : ", ": ")
    
    return text


def html_escape(text):
    return html.escape(text)

# Helper function
def highlight_entities(text, entities, print_entities=True):
    """
    text - string
    entities - string
    """
    
    entities = entities.split(":", 1)[1]
    entities = re.sub('<[^>]+>', '', entities)
    entities = entities.split()
    
    for entity in entities:

        # skip numbers
        if entity.isdigit(): continue
        
        # case when entity is long
        words_in_entity = entity.split("_")
        detected_entity = " ".join(words_in_entity)
        
        # number between 0.0 (fully transparent) and 1.0 (fully opaque)
        weight = 1
        
        text_highlighted = '<span style="background-color:rgba(135,206,250,' + str(weight) + ');">' \
                            + html_escape(detected_entity) + '</span>'
        
        text = text.replace(detected_entity, text_highlighted)
        
    
    # display detected entities in section content
    display(HTML(text))
    
    if print_entities: print(entities)


# apply make_nice_print to generated sections and original sections
for i in range(len(original_outputs_test)):
    original_outputs_test[i] = make_nice_print(original_outputs_test[i])

    
for i in range(len(t5_outputs_test)):
    t5_outputs_test[i] = make_nice_print(t5_outputs_test[i])

In [6]:
from random import randrange
import random

def seciton_indices(input_NER_test):
    
    section_indices = {'1':[], '2':[], '3':[], '4':[], '5':[], '6':[]}
    
    for ind, section_input in enumerate(input_NER_test):
        
        if section_input[:20] == 'What the medicine is and what it is used for: '.lower()[:20]:
            tmp = section_indices['1']
            tmp.append(ind)
            section_indices['1'] = tmp
            
        elif section_input[:20] == 'What you need to know before you take the medicine: '.lower()[:20]: 
            tmp = section_indices['2']
            tmp.append(ind)
            section_indices['2'] = tmp
            
        elif section_input[:20] == 'How to take the medicine: '.lower()[:20]: 
            tmp = section_indices['3']
            tmp.append(ind)
            section_indices['3'] = tmp
            
        elif section_input[:20] == 'Possible side effects: '.lower()[:20]: 
            tmp = section_indices['4']
            tmp.append(ind)
            section_indices['4'] = tmp
            
        elif section_input[:20] == 'How to store the medicine: '.lower()[:20]: 
            tmp = section_indices['5']
            tmp.append(ind)
            section_indices['5'] = tmp
            
        elif section_input[:20] == 'Contents of the pack and other information: '.lower()[:20]:
            tmp = section_indices['6']
            tmp.append(ind)
            section_indices['6'] = tmp
    
    return section_indices

In [14]:
def compare_results(original_outputs_test, t5_outputs_test, input_NER_test):
    
    is_specific = input('Would you like to see a specific section: ')
    
    if is_specific != 'n':
        section_indices = seciton_indices(input_NER_test)
        
        num_section = input('Which one: ')
        indices_available = section_indices[num_section]
        
        is_random = input('Would you like to see a random example: ')

        if is_random == 'y':
            rand_ind = int(random.choice(indices_available))
            if num_section in ['2', '3', '4']:
                size = len(t5_outputs_test[rand_ind])
                limit_true_text = original_outputs_test[rand_ind][:size]
                
                print()
                print(limit_true_text)
                print("\n============================\n")
                print(t5_outputs_test[rand_ind])
                
#                 highlight_entities(limit_true_text, input_NER_test[rand_ind], print_entities=False)
#                 print("============================")
#                 highlight_entities(t5_outputs_test[rand_ind], input_NER_test[rand_ind], print_entities=False)
            else:
                highlight_entities(original_outputs_test[rand_ind], input_NER_test[rand_ind], print_entities=False)
                print("\n============================\n")
                highlight_entities(t5_outputs_test[rand_ind], input_NER_test[rand_ind], print_entities=False)

        else:
            print("Available numbers: ", indices_available)

            choice = int(input('Your choice: '))

            highlight_entities(original_outputs_test[choice], input_NER_test[choice], print_entities=False)
            print("============================")
            highlight_entities(t5_outputs_test[choice], input_NER_test[choice], print_entities=False)
    else:
        
        ind = int(randrange(len(t5_outputs_test)))
        
        key = '1'
        
        section_indices = seciton_indices(input_NER_test)
        
        for index in section_indices.keys():
            
            for i in section_indices[index]:
                if int(i) == ind:
                    key = index
        
        if key in ['2', '3', '4']:
            size = len(t5_outputs_test[ind])
            limit_true_text = original_outputs_test[ind][:size]
            
            
            print()
            print(limit_true_text)
            print("\n============================\n")
            print(t5_outputs_test[ind])
            
#             highlight_entities(limit_true_text, input_NER_test[ind], print_entities=False)
#             print("============================")
#             highlight_entities(t5_outputs_test[ind], input_NER_test[ind], print_entities=False)
        else:
            highlight_entities(original_outputs_test[ind], input_NER_test[ind], print_entities=False)
            print("============================")
            highlight_entities(t5_outputs_test[ind], input_NER_test[ind], print_entities=False)

------------------------------------
###  Interactive Demo


**Section 1:** 'What the medicine is and what it is used for'     
**Section 2:** 'What you need to know before you take the medicine'   
**Section 3:** 'How to take the medicine'   
**Section 4:** 'Possible side effects'    
**Section 5:** 'How to store the medicine'    
**Section 6:** 'Contents of the pack and other information:'     

In [15]:
compare_results(original_outputs_test, t5_outputs_test, input_NER_test)

Would you like to see a specific section: y
Which one: 1
Would you like to see a random example: y




