In [1]:
import pickle
import numpy as np
import codecs
import random

from nltk.tokenize import wordpunct_tokenize

In [2]:
# load array of objects, where object - class Leaflet
with open("/home/ruslan_yermakov/nlg-ra/datasets/LEAFLET_TRAIN_DATASET.pickle", "rb") as f:
    train_dataset = pickle.load(f)

In [3]:
# load array of objects, where object - class Leaflet
with open("/home/ruslan_yermakov/nlg-ra/datasets/LEAFLET_VALID_DATASET.pickle", "rb") as f:
    valid_dataset = pickle.load(f)

In [4]:
# load array of objects, where object - class Leaflet
with open("/home/ruslan_yermakov/nlg-ra/datasets/LEAFLET_TEST_DATASET.pickle", "rb") as f:
    test_dataset = pickle.load(f)

In [5]:
len(train_dataset)

1068

In [6]:
len(valid_dataset)

134

In [7]:
len(test_dataset)

134

------------------------------------------------------------------------------------------------------

## Produce same output as the script *create_dataset* from data2text-plain

### Plan

section_content -------- true text   

entity_recognition  ------- actually input - set of records   

Set of records in order they appear in section_content    -------- content plan 

In [10]:
RECORD_DELIM = " "
DELIM = u"￨"

HOME = "HOME"
AWAY = "AWAY"

# ENTITY = "Indication"
ENTITY = "Section2"

PAD_WORD = '<blank>'
UNK_WORD = '<unk>'
UNK = 0
BOS_WORD = '<s>'
EOS_WORD = '</s>'

In [9]:
# make sure - sorted entities
def _sort_key(entity):
    return entity['BeginOffset']

def test_order_entities(section_entities):
    
    sorted_entities = sorted(section_entities, key=_sort_key)
    
    if sorted_entities == section_entities: return True
    
    return False


def test_order_dataset(dataset):
    for leaflet in dataset:

        current_leaflet_sections = [leaflet.section1, leaflet.section2, 
                                    leaflet.section3, leaflet.section4, 
                                    leaflet.section5, leaflet.section6]

        for current_section in current_leaflet_sections:

            if current_section.entity_recognition is None:
                continue

            assert test_order_entities(current_section.entity_recognition) == True

            
test_order_dataset(train_dataset)
test_order_dataset(valid_dataset)
test_order_dataset(test_dataset)

In [11]:
def create_summary_contentplan(dataset):
    """
    Transform dataset to be a suitable format for model data2text-plan
    """
    
    # array to store section2 of each leaflet
    summaries_leaflets = []
    
    # array to store content plan of each leaflet
    content_plan_leaflets = []
    
    for leaflet in dataset:
        
        # extract section1 content
        section2_content = leaflet.section2.section_content
        # extract results of NER
        section2_entity_recognition = leaflet.section2.entity_recognition
        
        # skip if either input or output is None
        if section2_content is None or section2_entity_recognition is None:
            continue
        
        # get the content plan of each section
        content_plan_section2 = ''
        
        for entity in section2_entity_recognition:
            entity_value = entity['Text'] if len(entity['Text'].split(" ")) == 0 else ("_").join(entity['Text'].split(" "))
            entity_type = entity['Type'] if entity['Type'] is not None and len(entity['Type']) > 0 else entity['Category']
            
            # randomly choose HOME or AWAY
            if random.randint(1,2) == 1:
                content_plan_section2 += entity_value + DELIM + ENTITY + DELIM + entity_type + DELIM + HOME
            else:
                content_plan_section2 += entity_value + DELIM + ENTITY + DELIM + entity_type + DELIM + AWAY
            
            if section2_entity_recognition.index(entity) != len(section2_entity_recognition) - 1:
                content_plan_section2 += " "
            else:
                content_plan_section2 += " " + "\n"


        content_plan_leaflets.append(content_plan_section2)

        # get the section2 content
        # make sure to have punctuations as a separate token
        section2_content = wordpunct_tokenize(section2_content)
        
        # back to string
        section2_content = " ".join(section2_content)
        
        # add "\n" at the end
        section2_content = section2_content + "\n"
        
        summaries_leaflets.append(section2_content)
    
    return (content_plan_leaflets, summaries_leaflets)

In [12]:
def add_special_records(records):
    """
    To src_train.txt and src_valid.txt pre-append these special characters, according to data2text-plan project
    """
    
    record = []
    record.append(UNK_WORD)
    record.append(PAD_WORD)
    record.append(PAD_WORD)
    record.append(PAD_WORD)
    records.append(DELIM.join(record))
    record = []
    record.append(PAD_WORD)
    record.append(PAD_WORD)
    record.append(PAD_WORD)
    record.append(PAD_WORD)
    records.append(DELIM.join(record))
    record = []
    record.append(BOS_WORD)
    record.append(PAD_WORD)
    record.append(PAD_WORD)
    record.append(PAD_WORD)
    records.append(DELIM.join(record))
    record = []
    record.append(EOS_WORD)
    record.append(PAD_WORD)
    record.append(PAD_WORD)
    record.append(PAD_WORD)
    records.append(DELIM.join(record))
    
    return records

In [13]:
# get the src_train - training data to be input to the model

def create_src_table(content_plan_leaflets):
    """
    Create src_train - input "table" to the model
    
    Idea: - we do not have a table, so randomized the records in content plan
    
    Update: - do not randomize - make it easier for the model to learn
    """
    
    # store input "table" of each leaflet in array
    src_leaflets = []
    
    for leaflet_content_plan in content_plan_leaflets:
        # remove the end symbol ('\n') of the string
        leaflet_content_plan = leaflet_content_plan[:-2]

        # split string into a list of records
        leaflet_content_plan_collection = leaflet_content_plan.split(" ")
        
        # do not do
        # randomly shuffle records in a list
        # random.shuffle(leaflet_content_plan_collection)
        
        # add special symbols to the begining
        special_symbols = []
        special_symbols = add_special_records(special_symbols)
        
        # create a string containing all the records and special_symbols in the begining
        src_leaflet_section1 = ''

        src_leaflet_section1 += " ".join(special_symbols)
        src_leaflet_section1 += " "

        src_leaflet_section1 += " ".join(leaflet_content_plan_collection)
        src_leaflet_section1 += '\n'


        src_leaflets.append(src_leaflet_section1)
    
    return src_leaflets

In [14]:
def create_training_sets(dataset):
    
    # Output files
    INTER_CONTENT_PLAN = 'inter/train_content_plan.txt'  # intermediate content plan input to second stage
    SRC_FILE = 'src_train.txt'  # src file input to first stage
    TRAIN_TGT_FILE = "tgt_train.txt"  # tgt file of second stage
    CONTENT_PLAN_OUT = 'train_content_plan.txt'  # content plan output of first stage
    
    # Create src - content_plan - summary
    content_plan_leaflets, summaries_leaflets = create_summary_contentplan(dataset)
    src_leaflets = create_src_table(content_plan_leaflets)
    
    # save to corresponding files
    output_file = open(INTER_CONTENT_PLAN, 'w')
    for content_plan in content_plan_leaflets:
        output_file.write(content_plan)
    output_file.close()
    
    summary_file = open(TRAIN_TGT_FILE, 'w')
    for summary_leaflet in summaries_leaflets:
        summary_file.write(summary_leaflet)
    summary_file.close()
    
    src_file = open(SRC_FILE, 'w')
    for src_instance in src_leaflets:
        src_file.write(src_instance)
    src_file.close()
    
    ### create last file needed - e.g (rotowire/train_content_plan.txt)
    inputs = []
    content_plans = []
    with codecs.open(INTER_CONTENT_PLAN, "r", "utf-8") as corpus_file:
        for i, line in enumerate(corpus_file):
            content_plans.append(line.split())

    with codecs.open(SRC_FILE, "r", "utf-8") as corpus_file:
        for i, line in enumerate(corpus_file):
            inputs.append(line.split())
    
    # basically - now content plan POINTs to index in the training dataset
    # content_plan - collection of indexes where each index points to record in training dataset - training_dataset[index]
    
    outputs = []

    for i, training_sample in enumerate(inputs):
        content_plan = content_plans[i]
        output = []
        for record in content_plan:
            output.append(str(training_sample.index(record)))
        outputs.append(" ".join(output))
        
    # write to a file

    output_file = open(CONTENT_PLAN_OUT, 'w')

    # add \n to the end of the string
    output_file.write("\n".join(outputs))
    # add \n between content plans
    output_file.write("\n")

    output_file.close()
    
    return src_leaflets, content_plan_leaflets, summaries_leaflets

In [15]:
leaflets_src_train, leaflets_inter_contentplan_train, leaflets_tgt_train = create_training_sets(train_dataset)

In [16]:
with open('src_train.txt') as reader:
    # This reads the remaining lines from the file object and returns them as a list.
    src_train = reader.readlines()

In [17]:
with open('tgt_train.txt') as reader:
    # This reads the remaining lines from the file object and returns them as a list.
    tgt_train = reader.readlines()

In [18]:
with open('train_content_plan.txt') as reader:
    # This reads the remaining lines from the file object and returns them as a list.
    train_content_plan = reader.readlines()

In [19]:
with open('inter/train_content_plan.txt') as reader:
    # This reads the remaining lines from the file object and returns them as a list.
    inter_train_content_plan = reader.readlines()

======================================================================================================================
## Validation dataset 

In [28]:
def create_validation_sets(dataset):
    
    # Output files    
    INTER_CONTENT_PLAN_VALID = 'inter/valid_content_plan.txt'  # intermediate content plan input to second stage
    SRC_FILE_VALID = 'src_valid.txt'  # src file input to first stage
    TRAIN_TGT_FILE_VALID = "tgt_valid.txt"  # tgt file of second stage
    CONTENT_PLAN_OUT_VALID = 'valid_content_plan.txt'  # content plan output of first stage
    
    # Create src - content_plan - summary
    content_plan_leaflets, summaries_leaflets = create_summary_contentplan(dataset)
    src_leaflets = create_src_table(content_plan_leaflets)
    
    # save to corresponding files
    output_file = open(INTER_CONTENT_PLAN_VALID, 'w')
    for content_plan in content_plan_leaflets:
        output_file.write(content_plan)
    output_file.close()
    
    summary_file = open(TRAIN_TGT_FILE_VALID, 'w')
    for summary_leaflet in summaries_leaflets:
        summary_file.write(summary_leaflet)
    summary_file.close()
    
    src_file = open(SRC_FILE_VALID, 'w')
    for src_instance in src_leaflets:
        src_file.write(src_instance)
    src_file.close()
    
    ### create last file needed - e.g (rotowire/train_content_plan.txt)
    inputs = []
    content_plans = []
    with codecs.open(INTER_CONTENT_PLAN_VALID, "r", "utf-8") as corpus_file:
        for i, line in enumerate(corpus_file):
            content_plans.append(line.split())

    with codecs.open(SRC_FILE_VALID, "r", "utf-8") as corpus_file:
        for i, line in enumerate(corpus_file):
            inputs.append(line.split())
    
    # basically - now content plan POINTs to index in the training dataset
    # content_plan - collection of indexes where each index points to record in training dataset - training_dataset[index]
    
    outputs = []

    for i, training_sample in enumerate(inputs):
        content_plan = content_plans[i]
        output = []
        for record in content_plan:
            output.append(str(training_sample.index(record)))
        outputs.append(" ".join(output))
        
    # write to a file

    output_file = open(CONTENT_PLAN_OUT_VALID, 'w')

    # add \n to the end of the string
    output_file.write("\n".join(outputs))
    # add \n between content plans
    output_file.write("\n")

    output_file.close()
    
    return src_leaflets, content_plan_leaflets, summaries_leaflets

In [29]:
leaflets_src_valid, leaflets_inter_contentplan_valid, leaflets_tgt_valid = create_validation_sets(valid_dataset)

In [39]:
with open('inter/valid_content_plan.txt') as reader:
    # This reads the remaining lines from the file object and returns them as a list.
    contentplan_valid = reader.readlines()

In [37]:
len(contentplan_valid)

130

INTER_CONTENT_PLAN_VALID = 'inter/valid_content_plan.txt'  # intermediate content plan input to second stage
    SRC_FILE_VALID = 'src_valid.txt'  # src file input to first stage
    TRAIN_TGT_FILE_VALID = "tgt_valid.txt"  # tgt file of second stage
    CONTENT_PLAN_OUT_VALID = 'valid_content_plan.txt'  # content plan output of first stage

================================================================================================================================
## Test dataset 

In [41]:
def create_test_sets(dataset):
    
    # Output files
    SRC_FILE_TEST = 'test/src_test.txt'  # src file input to first stage
    TRAIN_TGT_FILE_TEST = "test/tgt_test.txt"  # tgt file of second stage 
    
    
    # Create src - content_plan - summary
    content_plan_leaflets, summaries_leaflets = create_summary_contentplan(dataset)
    src_leaflets = create_src_table(content_plan_leaflets)
    
    # save to just summary and src data
    
    summary_file = open(TRAIN_TGT_FILE_TEST, 'w')
    for summary_leaflet in summaries_leaflets:
        summary_file.write(summary_leaflet)
    summary_file.close()
    
    src_file = open(SRC_FILE_TEST, 'w')
    for src_instance in src_leaflets:
        src_file.write(src_instance)
    src_file.close()

    return src_leaflets, summaries_leaflets

In [42]:
leaflets_src_test, leaflets_tgt_test = create_test_sets(test_dataset)

=======================================================================================================================
## Creating *train-roto-ptrs.txt*

In [43]:
# Output files
INTER_CONTENT_PLAN = 'inter/train_content_plan.txt'  # intermediate content plan input to second stage
TRAIN_TGT_FILE = "tgt_train.txt"  # tgt file of second stage
OUTPUT_FILE = "train-roto-ptrs.txt"

In [44]:
with open(TRAIN_TGT_FILE) as reader:
    leaflet_tgt_train = reader.readlines()

In [45]:
with open(INTER_CONTENT_PLAN) as reader:
    leaflets_inter_content_plan = reader.readlines()

For eg: the last entry 245,39 in train_roto_ptrs[1] indicates that the 245th token in summary matches with 39th content plan entry.  

Phoenix ----> Phoenix￨Suns￨TEAM-CITY￨HOME  
Suns ----> Suns￨Suns￨TEAM-NAME￨HOME  
39 ----> 39￨Suns￨TEAM-WINS￨HOME  
38 ----> 38￨Suns￨TEAM-LOSSES￨HOME  
87 ----> 87￨Suns￨TEAM-PTS￨HOME  
85 ----> 85￨Jazz￨TEAM-PTS￨AWAY  
Utah ----> Utah￨Jazz￨TEAM-CITY￨AWAY  

In [46]:
leaflet_tgt_train[0]

"do not use cystagon - if you - or your child - are allergic ( hypersensitive ) to cysteamine bitartrate or penicillamine or any of the other ingredients of cystagon . - if you are pregnant , this is particularly relevant during the first trimester - if you are breast - feeding . take special care with cystagon - when your or your child ' s disorder has been confirmed by leucocyte cystine measurements , the therapy with cystagon must be started as soon as possible . - a few cases of skin lesions on elbows like little hard lumps have been reported in children treated with high doses of different cysteamine preparations . these lesions were associated with skin striae and bone lesions such as fracture and bone deformities , and with laxity of joints . your doctor could require a regular physical and x - ray examination for the skin and the bones to control the effects of the medicinal product . self examination of your or your child ' s skin is recommended . if any skin or bone abnormali

In [47]:
leaflets_inter_content_plan[0]

"cystagon￨Section2￨PRODUCT_NAME￨HOME cystagon￨Section2￨GENERIC_NAME￨AWAY allergic￨Section2￨PROBLEM￨AWAY hypersensitive￨Section2￨DX_NAME￨AWAY cysteamine_bitartrate￨Section2￨GENERIC_NAME￨HOME penicillamine￨Section2￨GENERIC_NAME￨HOME cystagon￨Section2￨GENERIC_NAME￨HOME pregnant￨Section2￨DX_NAME￨HOME cystagon￨Section2￨TREATMENT￨HOME your_child's_disorder￨Section2￨PROBLEM￨AWAY leucocyte_cystine_measurements￨Section2￨TEST￨AWAY the_therapy￨Section2￨TREATMENT￨AWAY cystagon￨Section2￨GENERIC_NAME￨HOME skin_lesions_on_elbows￨Section2￨PROBLEM￨AWAY little_hard_lumps￨Section2￨PROBLEM￨AWAY different_cysteamine_preparations￨Section2￨TREATMENT￨HOME these_lesions￨Section2￨PROBLEM￨HOME skin_striae￨Section2￨DX_NAME￨HOME bone_lesions￨Section2￨DX_NAME￨AWAY fracture￨Section2￨DX_NAME￨HOME bone_deformities￨Section2￨DX_NAME￨AWAY laxity_of_joints￨Section2￨PROBLEM￨AWAY a_regular_physical_and_x-ray_examination￨Section2￨TEST￨HOME the_skin_and_the_bones￨Section2￨PROBLEM￨HOME the_medicinal_product￨Section2￨TREATMENT￨

In [48]:
roto_pts_content = []

# for each leaflet
for leflet_num in range(len(leaflet_tgt_train)):
    
    # get current leaflet and content plan
    current_leaflet = leaflet_tgt_train[leflet_num].split()
    current_content_plan = leaflets_inter_content_plan[leflet_num].split()
    
    # get the values of content plan
    instances = []
    for entry in current_content_plan:
        record_values = entry.split(DELIM)[0]
        instances.append(record_values)
    
    # pairs (index_tgt, index_contentplan) for each leaflet
    current_str = []
    
    # for each token in current summary
    for token_pos in range(len(current_leaflet)):
        
        # get token
        token = current_leaflet[token_pos]
        
        # possible tokens if 2 words in content plan like 'immunodeficiency_syndrome'
        if token_pos < (len(current_leaflet)-1):
            token_2words = current_leaflet[token_pos] + "_" + current_leaflet[token_pos+1]
        else:
            token_2words = 'something that would never be in the section content'
        
        ### my-new-change
        # possible tokens if 3 words in content plan
        if token_pos < (len(current_leaflet)-2):
            token_3words = current_leaflet[token_pos] + "_" + current_leaflet[token_pos+1] + "_" + current_leaflet[token_pos+2]
        else:
            token_3words = 'something that would never be in the section content'
        
        # possible tokens if 4 words in content plan
        if token_pos < (len(current_leaflet)-3):
            token_4words = current_leaflet[token_pos] + "_" + current_leaflet[token_pos+1] + "_" + current_leaflet[token_pos+2] + "_" + current_leaflet[token_pos+3]
        else:
            token_4words = 'something that would never be in the section content'
        
        
        for content_plan_index in range(len(instances)):
                
            if token_4words == instances[content_plan_index]:
                # mask the corresponding position in content plan
                instances[content_plan_index] = "MASKED"
                pair = str(token_pos) + "," + str(content_plan_index)
                current_str.append(pair)

                # find just one match
                break
            
            if token_3words == instances[content_plan_index]:
                # mask the corresponding position in content plan
                instances[content_plan_index] = "MASKED"
                pair = str(token_pos) + "," + str(content_plan_index)
                current_str.append(pair)
                # find just one match
                break
            
            if token_2words == instances[content_plan_index]:
                # mask the corresponding position in content plan
                instances[content_plan_index] = "MASKED"
                pair = str(token_pos) + "," + str(content_plan_index)
                current_str.append(pair)
                # find just one match
                break
            
            if token == instances[content_plan_index]:
                # mask the corresponding position in content plan
                instances[content_plan_index] = "MASKED"
                pair = str(token_pos) + "," + str(content_plan_index)
                current_str.append(pair)
                # find just one match
                break
    
    # join pairs into string with " " between pairs
    current_str = " ".join(current_str)
    
    # add "\n" at the end
    current_str += "\n"
    
    roto_pts_content.append(current_str)

In [49]:
OUTPUT_FILE = "train-roto-ptrs.txt"

src_file = open(OUTPUT_FILE, 'w')
for src_instance in roto_pts_content:
    src_file.write(src_instance)
src_file.close()

In [50]:
len(roto_pts_content)

1046

In [51]:
index = 500

In [52]:
roto_pts_content[index]

'3,0 7,2 9,3 15,4 26,5 28,6 31,7 35,8 36,144 43,9 45,10 54,1 57,12 59,13 65,14 70,15 72,16 77,17 86,215 92,49 98,20 99,27 109,21 110,92 113,22 117,23 125,24 130,25 132,26 141,90 146,29 164,11 175,30 191,32 194,33 195,204 198,34 206,35 211,61 216,37 220,38 223,39 231,41 233,42 236,43 239,44 242,45 246,46 259,48 266,50 270,51 273,52 277,53 296,56 307,58 309,59 316,60 323,62 326,73 329,63 335,64 340,65 346,66 364,31 370,69 383,70 384,234 389,71 394,72 395,102 399,103 406,74 429,76 435,77 439,68 447,78 455,80 457,161 461,81 462,246 473,83 475,84 477,85 484,86 487,87 491,88 497,89 508,91 514,166 517,93 533,79 539,95 556,96 558,94 564,98 576,99 578,100 596,101 610,105 611,121 613,106 618,107 621,108 623,109 625,110 627,111 629,112 632,113 634,114 636,115 639,116 641,117 648,177 653,119 692,122 696,123 698,124 700,125 702,126 713,127 715,128 726,97 728,129 735,131 748,130 758,133 764,134 765,172 767,132 785,136 786,187 787,176 798,137 803,138 817,139 818,182 824,140 825,194 834,135 837,142 83

In [53]:
len(roto_pts_content[index].split())

258

In [54]:
len(leaflets_inter_content_plan[index].split())

290

### Check whether correct

In [55]:
content_plan_indeces = []

for pair in roto_pts_content[index].split():
    pair = pair.split(",")
    
    a = int(pair[0])
    b = int(pair[1])
    
    content_plan_indeces.append(b)
    
    print(leaflet_tgt_train[index].split()[a:a+4], end=" ----> ")
    print(leaflets_inter_content_plan[index].split()[b])
    

['kinzalkomb', 'if', 'you', 'are'] ----> kinzalkomb￨Section2￨PRODUCT_NAME￨AWAY
['allergic', 'to', 'telmisartan', 'or'] ----> allergic￨Section2￨PROBLEM￨HOME
['telmisartan', 'or', 'any', 'other'] ----> telmisartan￨Section2￨GENERIC_NAME￨HOME
['this', 'medicine', '(', 'listed'] ----> this_medicine￨Section2￨TREATMENT￨HOME
['allergic', 'to', 'hydrochlorothiazide', 'or'] ----> allergic￨Section2￨PROBLEM￨HOME
['hydrochlorothiazide', 'or', 'to', 'any'] ----> hydrochlorothiazide￨Section2￨GENERIC_NAME￨AWAY
['any', 'other', 'sulfonamide', '-'] ----> any_other_sulfonamide￨Section2￨TREATMENT￨HOME
['derived', 'medicines', '.', 'if'] ----> derived_medicines￨Section2￨TREATMENT￨HOME
['medicines', '.', 'if', 'you'] ----> medicines￨Section2￨TREATMENT￨HOME
['3', 'months', 'pregnant', '.'] ----> 3￨Section2￨NUMBER￨AWAY
['pregnant', '.', '(', 'it'] ----> pregnant￨Section2￨DX_NAME￨HOME
['kinzalkomb', 'in', 'early', 'pregnancy'] ----> kinzalkomb￨Section2￨GENERIC_NAME￨AWAY
['pregnancy', 'see', 'pregnancy', 'secti

In [56]:
# check out pairs missed
for i in range(0, len(leaflets_inter_content_plan[index].split()), 1):
    if i not in content_plan_indeces:
        print(leaflets_inter_content_plan[index].split()[i])
        
# explanation - 3-word long token
# explanation - hiv_infection ---> bc NER outputs - 'hiv', 'hiv_infection' - in content plan I have 2 tokens starting with 'hiv'

the_bile_from_the_liver_and_gall_bladder￨Section2￨PROBLEM￨HOME
any_other_severe_liver_disease￨Section2￨PROBLEM￨HOME
a_blood_pressure_lowering_medicine￨Section2￨TREATMENT￨AWAY
excessive_loss_of_body_water￨Section2￨PROBLEM￨AWAY
low-salt_diet￨Section2￨TREATMENT￨AWAY
narrowing_of_the_blood_vessels￨Section2￨PROBLEM￨HOME
salt_retention_in_the_body￨Section2￨TREATMENT￨AWAY
imbalance_of_various_blood_minerals￨Section2￨TREATMENT￨HOME
lupus"￨Section2￨PROBLEM￨AWAY
an_increase_of_pressure_in_your_eye￨Section2￨PROBLEM￨HOME
skin_and_lip_cancer_(non-melanoma_skin_cancer￨Section2￨PROBLEM￨HOME
an_ace-inhibitor￨Section2￨TREATMENT￨HOME
electrolyte_imbalance_in_your_body￨Section2￨PROBLEM￨AWAY
an_abnormally_fast_heart_rate￨Section2￨PROBLEM￨AWAY
an_increased_sensitivity_of_the_skin￨Section2￨PROBLEM￨HOME
hypokalaemia)￨Section2￨PROBLEM￨HOME
('water_tablets'￨Section2￨TREATMENT￨HOME
laxatives_(e.g._castor_oil￨Section2￨TREATMENT￨AWAY
a_hormone)￨Section2￨TREATMENT￨AWAY
an_antibiotic)￨Section2￨TREATMENT￨HOME
potass

In [57]:
leaflet_tgt_train[500]



In [58]:
leaflets_inter_content_plan[500]

'kinzalkomb￨Section2￨PRODUCT_NAME￨AWAY kinzalkomb￨Section2￨GENERIC_NAME￨AWAY allergic￨Section2￨PROBLEM￨HOME telmisartan￨Section2￨GENERIC_NAME￨HOME this_medicine￨Section2￨TREATMENT￨HOME allergic￨Section2￨PROBLEM￨HOME hydrochlorothiazide￨Section2￨GENERIC_NAME￨AWAY any_other_sulfonamide￨Section2￨TREATMENT￨HOME derived_medicines￨Section2￨TREATMENT￨HOME 3￨Section2￨NUMBER￨AWAY pregnant￨Section2￨DX_NAME￨HOME kinzalkomb￨Section2￨GENERIC_NAME￨AWAY pregnancy￨Section2￨DX_NAME￨HOME pregnancy_section￨Section2￨PROCEDURE_NAME￨AWAY severe_liver_problems￨Section2￨PROBLEM￨HOME cholestasis￨Section2￨DX_NAME￨HOME biliary_obstruction￨Section2￨DX_NAME￨HOME drainage￨Section2￨DX_NAME￨AWAY the_bile_from_the_liver_and_gall_bladder￨Section2￨PROBLEM￨HOME any_other_severe_liver_disease￨Section2￨PROBLEM￨HOME severe_kidney_disease￨Section2￨PROBLEM￨HOME low_potassium_levels￨Section2￨DX_NAME￨HOME high_calcium_levels￨Section2￨PROBLEM￨HOME your_blood￨Section2￨TEST￨AWAY treatment￨Section2￨TREATMENT￨AWAY diabetes￨Section2￨