In [1]:
import sys, os
module_path = os.path.abspath(os.path.join('..'))
module_path = os.path.join(module_path, 'scripts')
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import pprint
import pandas as pd
import uuid
import json
import os
import glob
import re
import sys
from bs4 import NavigableString, BeautifulSoup
from collections import defaultdict
import random
import string

from utils.config import config
from utils.logger.logger import loggerCreator

## ePI Modules
from parse.rulebook.rulebook import StyleRulesDictionary

from parse.extractor.parser import parserExtractor
from match.matchDocument.matchDocument import MatchDocument
from documentAnnotation.documentAnnotation import DocumentAnnotation
from htmlDocTypePartitioner.partition import DocTypePartitioner
from extractContentBetweenHeadings.dataBetweenHeadingsExtractor import DataBetweenHeadingsExtractor
from fhirXmlGenerator.fhirXmlGenerator import FhirXmlGenerator

%load_ext autoreload

%autoreload 2

In [3]:
def getRandomString(N):
    str_ = ''.join(random.choice(string.ascii_uppercase + string.digits \
            + string.ascii_lowercase) for _ in range(N))
    return str_



# Set Required Field for Parsing and Partition Modules

### Please ensure that your converted_html folder has html files in their specific language folders

Example: If your language code is en, please ensure that all html files reside in the converted_html/en folder. If folder is not present, a folder not found exception will be thrown

### English

In [22]:
ePILanguage = 'en'
fileNameQrd = 'qrd_canonical_mode_CAP_NAP.csv'
procedureType = 'CAP'

### German

In [4]:
ePILanguage = 'de'
fileNameQrd = 'qrd_canonical_mode_CAP_NAP.csv'
procedureType = 'CAP'

### Spanish

In [7]:
ePILanguage = 'es'
fileNameQrd = 'qrd_canonical_mode_CAP_NAP.csv'
procedureType = 'CAP'

# Html Parsing Stage

In [8]:
class FolderNotFoundError(Exception):
    pass

## Generate input folder path
module_path = os.path.abspath(os.path.join('..'))
module_path = os.path.join(module_path, 'data')
module_path = os.path.join(module_path, 'converted_to_html')
module_path = os.path.join(module_path, ePILanguage)

## Generate output folder path
output_json_path = module_path.replace('converted_to_html','outputJSON')

"""
    Check if input folder exists, else throw exception
"""
if(os.path.exists(module_path)):
    filenames = glob.glob(os.path.join(module_path, '*.html'))
    filenames.extend(glob.glob(os.path.join(module_path, '*.htm')))
    
    ## Create language specific folder in outputJSON folder if it doesn't exist
    if(not os.path.exists(output_json_path)):
        os.mkdir(output_json_path)
    logger = loggerCreator('Parser_'+ getRandomString(1))
    
    styleRulesObj = StyleRulesDictionary(loggerCreator('Style Dictionary_'+ getRandomString(1)),
                                     language = ePILanguage,
                                     fileName = fileNameQrd,
                                     procedureType = procedureType)

    parserObj = parserExtractor(config, logger, styleRulesObj.styleRuleDict, 
                            styleRulesObj.styleFeatureKeyList, 
                            styleRulesObj.qrd_section_headings)

    for input_filename in filenames:
#     if(input_filename.find('emea-combined-h-2494-es')!=-1):
        output_filename = input_filename.replace('converted_to_html','outputJSON')
        output_filename = output_filename.replace('.html','.json')
        output_filename = output_filename.replace('.htm','.json')
        print(input_filename, output_filename)
        parserObj.createPIJsonFromHTML(input_filepath = input_filename,
                                       output_filepath = output_filename,
                                       img_base64_dict= parserObj.convertImgToBase64(input_filename),
                                      )
else:
    raise FolderNotFoundError(module_path + " not found")

2021-04-23 20:39:49,689 : Style Dictionary_l : Creating default style dictionary in file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\styleRules\rule_dictionary_es.json
2021-04-23 20:39:49,689 : Style Dictionary_l : Creating default style dictionary in file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\styleRules\rule_dictionary_es.json
2021-04-23 20:39:49,720 : Style Dictionary_l : Qrd Section Keys Generated: ANEXO I, ANEXO II, ANEXO III, B. PROSPECTO
2021-04-23 20:39:49,720 : Style Dictionary_l : Qrd Section Keys Generated: ANEXO I, ANEXO II, ANEXO III, B. PROSPECTO


C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\converted_to_html\es\emea-combined-h-2494-es.htm C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\outputJSON\es\emea-combined-h-2494-es.json


2021-04-23 20:39:54,487 : Parser_U : Writing to file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\outputJSON\es\emea-combined-h-2494-es.json


# Partition Stage

In [9]:
styleRulesObj = StyleRulesDictionary(loggerCreator('Style Dictionary_'+ getRandomString(1)), 
                                     language = ePILanguage,
                                     fileName = fileNameQrd,
                                     procedureType = procedureType)

path_json = os.path.join(os.path.abspath(os.path.join('..')), 'data', 'outputJSON', ePILanguage)

partitionlogger = loggerCreator('Partition_'+ getRandomString(1))
partitioner = DocTypePartitioner(partitionlogger)
partitioner.partitionHtmls(styleRulesObj.qrd_section_headings, path_json)

2021-04-23 20:40:18,556 : Style Dictionary_n : Reading style dictionary in file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\styleRules\rule_dictionary_es.json
2021-04-23 20:40:18,579 : Style Dictionary_n : Qrd Section Keys Generated: ANEXO I, ANEXO II, ANEXO III, B. PROSPECTO
2021-04-23 20:40:18,590 : Partition_M : Partitioning Json: emea-combined-h-2494-es.json


*************************** Texts with more than 2 characters**************************************


Unnamed: 0,Element,ID,Styles,Classes,Bold,Italics,Uppercased,Underlined,Indexed,IsListItem,HasBorder,IsPossibleHeading,IsHeadingType,Text,ParentId
0,"<div class=""WordSection1""> <p align=""center"" class=""MsoNormal"" style=""text-align:center""><b><span lang=""ES""> </span></b></p> <p align=""center"" class=""MsoNormal"" style=""text-align:center""><b><span ...",7edae4ce-fdf2-43fb-8fea-4f67632ce52e,,['WordSection1'],False,False,False,False,False,False,False,False,,...,5a8310a7-a311-4231-9fae-269b9389b8b1
24,"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><b><span lang=""ES"">ANEXO I</span></b></p>",763339d8-93c0-4bf8-acf1-6d52eea9f5d6,text-align:center,['MsoNormal'],True,False,True,False,False,False,False,True,,ANEXO I,7edae4ce-fdf2-43fb-8fea-4f67632ce52e
26,"<p class=""TitleA""><span lang=""ES"">FICHA TÉCNICA O RESUMEN DE LAS CARACTERÍSTICAS DEL PRODUCTO</span></p>",56c29f2f-3e93-4904-bb65-5263f4d0b3ee,,['TitleA'],True,False,True,False,False,False,False,False,,FICHA TÉCNICA O RESUMEN DE LAS CARACTERÍSTICAS DEL PRODUCTO,7edae4ce-fdf2-43fb-8fea-4f67632ce52e
34,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;page-break-after: avoid""><b><span lang=""ES"">1. </span></b><b><span lang=""ES"">NOMBRE DEL MEDICAMENTO</span></b></p>",04dec940-cfca-4a98-9cd1-db469b1f5e95,margin-left:28.35pt;text-indent:-28.35pt;page-break-after:\navoid,['MsoNormal'],True,False,True,False,True,False,False,True,L1,1. NOMBRE DEL MEDICAMENTO,7edae4ce-fdf2-43fb-8fea-4f67632ce52e
36,"<p class=""MsoNormal""><span lang=""ES"">Kalydeco 75 mg comprimidos recubiertos con película</span></p>",aa0dba02-0264-40c1-9909-f386b63ba80d,,['MsoNormal'],False,False,False,False,False,False,False,False,,Kalydeco 75 mg comprimidos recubiertos con película,7edae4ce-fdf2-43fb-8fea-4f67632ce52e


2021-04-23 20:40:18,748 : Partition_M : Writing partition to file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\partitionedJSONs\emea-combined-h-2494-es_SmPC.json


*************************** Texts with more than 2 characters**************************************


Unnamed: 0,Element,ID,Styles,Classes,Bold,Italics,Uppercased,Underlined,Indexed,IsListItem,HasBorder,IsPossibleHeading,IsHeadingType,Text,ParentId
1247,"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><b><span lang=""ES"">ANEXO II</span></b></p>",bb98fdc0-6015-4430-9eb7-f752c9752e7b,text-align:center,['MsoNormal'],True,False,True,False,False,False,False,True,,ANEXO II,7edae4ce-fdf2-43fb-8fea-4f67632ce52e
1249,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.95pt;margin-bottom: 0in;margin-left:.5in;margin-bottom:.0001pt;text-indent:-35.4pt""><b><span lang=""ES"">A. FABRICANTE(S) RESPONSABL...",1b3c90a3-f76e-4ada-985d-842866114f6c,margin-top:0in;margin-right:-.95pt;margin-bottom:\n0in;margin-left:.5in;margin-bottom:.0001pt;text-indent:-35.4pt,['MsoNormal'],True,False,True,False,True,False,False,True,L1,A. FABRICANTE(S) RESPONSABLE(S) DE LA LIBERACIÓN DE LOS LOTES,7edae4ce-fdf2-43fb-8fea-4f67632ce52e
1251,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.95pt;margin-bottom: 0in;margin-left:.5in;margin-bottom:.0001pt;text-indent:-35.4pt""><b><span lang=""ES"">B. CONDICIONES O RESTRICCIO...",4f8926bf-4a6b-4a6a-951f-c3b4628b1e5a,margin-top:0in;margin-right:-.95pt;margin-bottom:\n0in;margin-left:.5in;margin-bottom:.0001pt;text-indent:-35.4pt,['MsoNormal'],True,False,True,False,True,False,False,True,L1,B. CONDICIONES O RESTRICCIONES DE SUMINISTRO Y USO,7edae4ce-fdf2-43fb-8fea-4f67632ce52e
1253,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.95pt;margin-bottom: 0in;margin-left:.5in;margin-bottom:.0001pt;text-indent:-35.4pt""><b><span lang=""ES"">C. OTRAS CONDICIONES Y REQU...",1beba742-71bb-4843-b164-59d3109de651,margin-top:0in;margin-right:-.95pt;margin-bottom:\n0in;margin-left:.5in;margin-bottom:.0001pt;text-indent:-35.4pt,['MsoNormal'],True,False,True,False,True,False,False,True,L1,C. OTRAS CONDICIONES Y REQUISITOS DE LA AUTORIZACIÓN DE COMERCIALIZACIÓN,7edae4ce-fdf2-43fb-8fea-4f67632ce52e
1255,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.95pt;margin-bottom: 0in;margin-left:.5in;margin-bottom:.0001pt;text-indent:-35.4pt""><b><span lang=""ES"">D. CONDICIONES O RESTRICCIO...",84548cc1-2f9a-4ce8-ba25-6ce28a6c609f,margin-top:0in;margin-right:-.95pt;margin-bottom:\n0in;margin-left:.5in;margin-bottom:.0001pt;text-indent:-35.4pt,['MsoNormal'],True,False,True,False,True,False,False,True,L1,D. CONDICIONES O RESTRICCIONES EN RELACIÓN CON LA UTILIZACIÓN SEGURA Y EFICAZ DEL MEDICAMENTO,7edae4ce-fdf2-43fb-8fea-4f67632ce52e


2021-04-23 20:40:18,856 : Partition_M : Writing partition to file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\partitionedJSONs\emea-combined-h-2494-es_ANEXO II.json


*************************** Texts with more than 2 characters**************************************


Unnamed: 0,Element,ID,Styles,Classes,Bold,Italics,Uppercased,Underlined,Indexed,IsListItem,HasBorder,IsPossibleHeading,IsHeadingType,Text,ParentId
1339,"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><b><span lang=""ES"">ANEXO III</span></b></p>",36da94bc-adbf-4a41-b6ee-714b30985276,text-align:center,['MsoNormal'],True,False,True,False,False,False,False,True,L1,ANEXO III,7edae4ce-fdf2-43fb-8fea-4f67632ce52e
1341,"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><b><span lang=""ES"">ETIQUETADO Y PROSPECTO</span></b></p>",6c0e0bf6-e16d-4451-bd3f-c0a9aa91fccf,text-align:center,['MsoNormal'],True,False,True,False,False,False,False,True,L1,ETIQUETADO Y PROSPECTO,7edae4ce-fdf2-43fb-8fea-4f67632ce52e
1368,"<p class=""TitleA""><span lang=""ES"">A. ETIQUETADO</span></p>",57daa241-cce1-49be-8f58-53765e53987b,,['TitleA'],True,False,True,False,True,False,False,True,L2,A. ETIQUETADO,7edae4ce-fdf2-43fb-8fea-4f67632ce52e
1373,"<div style=""border:solid windowtext 1.0pt;padding:1.0pt 4.0pt 1.0pt 4.0pt""> <p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;border:none; padding:0in""><b><span lang=""ES"">INFORM...",253600f9-a63a-4deb-b446-da6892382486,border:solid windowtext 1.0pt;padding:1.0pt 4.0pt 1.0pt 4.0pt,,False,False,False,False,False,False,True,False,,,7edae4ce-fdf2-43fb-8fea-4f67632ce52e
1374,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;border:none; padding:0in""><b><span lang=""ES"">INFORMACIÓN QUE DEBE FIGURAR EN EL EMBALAJE EXTERIOR</span></b></p>",1afda95c-5817-4dcc-9d64-8826e4e05b49,margin-left:28.35pt;text-indent:-28.35pt;border:none;\npadding:0in,['MsoNormal'],True,False,True,False,False,False,False,True,L1,INFORMACIÓN QUE DEBE FIGURAR EN EL EMBALAJE EXTERIOR,253600f9-a63a-4deb-b446-da6892382486


2021-04-23 20:40:18,951 : Partition_M : Writing partition to file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\partitionedJSONs\emea-combined-h-2494-es_ANEXO III.json


*************************** Texts with more than 2 characters**************************************


Unnamed: 0,Element,ID,Styles,Classes,Bold,Italics,Uppercased,Underlined,Indexed,IsListItem,HasBorder,IsPossibleHeading,IsHeadingType,Text,ParentId
3223,"<p class=""TitleA""><span lang=""ES"">B. PROSPECTO</span></p>",b311cfc4-5d67-48d3-a477-7e844a5b5d0a,,['TitleA'],True,False,True,False,True,False,False,True,,B. PROSPECTO,7edae4ce-fdf2-43fb-8fea-4f67632ce52e
3227,"<p align=""center"" class=""MsoNormal"" style=""text-align:center;page-break-after:avoid""><b><span lang=""ES"">Prospecto: información para el paciente</span></b></p>",e3526be1-4093-4cb8-8e5f-657fb9a6b903,text-align:center;page-break-after:avoid,['MsoNormal'],True,False,False,False,False,False,False,True,L2,Prospecto: información para el paciente,7edae4ce-fdf2-43fb-8fea-4f67632ce52e
3229,"<p align=""center"" class=""MsoNormal"" style=""text-align:center;page-break-after:avoid""><b><span lang=""ES"">Kalydeco 75 mg comprimidos recubiertos con película</span></b></p>",af423066-c284-4bfd-9635-6315e70c5183,text-align:center;page-break-after:avoid,['MsoNormal'],True,False,False,False,False,False,False,True,L2,Kalydeco 75 mg comprimidos recubiertos con película,7edae4ce-fdf2-43fb-8fea-4f67632ce52e
3230,"<p align=""center"" class=""MsoNormal"" style=""text-align:center;page-break-after:avoid""><b><span lang=""ES"">Kalydeco 150 mg comprimidos recubiertos con película</span></b></p>",471297cb-8d70-4cf5-bb7d-030dfc90f633,text-align:center;page-break-after:avoid,['MsoNormal'],True,False,False,False,False,False,False,True,L2,Kalydeco 150 mg comprimidos recubiertos con película,7edae4ce-fdf2-43fb-8fea-4f67632ce52e
3231,"<p align=""center"" class=""MsoNormal"" style=""text-align:center;page-break-after:avoid""><span lang=""ES"">ivacaftor</span></p>",e477d518-27f6-4a1b-8969-5edd6e8f5c2a,text-align:center;page-break-after:avoid,['MsoNormal'],False,False,False,False,False,False,False,False,,ivacaftor,7edae4ce-fdf2-43fb-8fea-4f67632ce52e


2021-04-23 20:40:19,008 : Partition_M : Writing partition to file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\partitionedJSONs\emea-combined-h-2494-es_ PROSPECTO.json


# Matching Stage

In [52]:
# Set variables for the specific section.

previousHeadingRowFound = None
procedureType = 'CAP'
languageCode = 'en'
documentType = 'SmPC'
stopWordlanguage = 'english'
docFilter = 'SmPC.json'
fileNameDoc = 'Kalydeco II-86-PI-clean_SmPC.json'
fileNameQrd = 'qrd_canonical_mode_CAP_NAP.csv'
fileNameMatchRuleBook = 'ruleDict.json'

stopWordFilterLen = 6 
topHeadingsConsidered = 4
bottomHeadingsConsidered = 6
isPackageLeaflet = False

In [53]:

# Set variables for the specific section.

previousHeadingRowFound = None
procedureType = 'CAP'
languageCode = 'en'
documentType = 'Package leaflet'
stopWordlanguage = 'english'
docFilter = 'LEAFLET.json'
fileNameDoc = 'Abilify-h-471-e_ PACKAGE LEAFLET.json'
fileNameQrd = 'qrd_canonical_mode_CAP_NAP.csv'
fileNameMatchRuleBook = 'ruleDict.json'

stopWordFilterLen = 100
topHeadingsConsidered = 5
bottomHeadingsConsidered = 10
isPackageLeaflet = True

In [54]:
matchDocObj = MatchDocument(procedureType,
                 languageCode,
                 documentType,
                 fileNameDoc,
                 fileNameQrd,
                 fileNameMatchRuleBook,
                 topHeadingsConsidered,
                 bottomHeadingsConsidered,
                 stopWordFilterLen,
                 stopWordlanguage,
                 isPackageLeaflet)

File being processed: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\partitionedJSONs\Abilify-h-471-e_ PACKAGE LEAFLET.json
--------------------------------------------


In [55]:
    df, coll = matchDocObj.matchHtmlHeaddingsWithQrd()

True  ||  <=4|16.67|(91, 100, 95)|0.91|  ||  B. PACKAGE LEAFLET  ||  PACKAGE LEAFLET
True  ||    ||  What is in this leaflet  ||  What is in this leaflet
True  ||  >7|2.38|(99, 98, 99)|0.99|  ||  1.       What ABILIFY is and what it is used for  ||  1 What ABILIFY is and what it is used for
True  ||  Contains<>|18.75|(91, 83, 95)|0.94|  ||  2.       What you need to know before you take ABILIFY  ||  2 What you need to know before you <take> <use> ABILIFY 
True  ||  Contains<>|42.11|(83, 63, 95)|0.96|  ||  Do not take ABILIFY  ||  Do not <take> <use> ABILIFY
True  ||  Contains<>|8.33|(96, 96, 98)|0.99|  ||  Children and adolescents  ||  Children <and adolescents>
True  ||    ||  Other medicines and ABILIFY  ||  Other medicines and ABILIFY
True  ||  SpecialCase2|47.22|(81, 67, 95)|0.94|  ||  ABILIFY with food, drink and alcohol  ||  ABILIFY with <food> <and> <,> <drink> <and> <alcohol>
True  ||  SpecialCase3|28.21|(88, 82, 95)|0.94|  ||  Pregnancy, breast-feeding and fertility  ||  Pregn

True  ||  Contains<>|20.0|(91, 82, 95)|0.94|  ||  If you take more ABILIFY than you should  ||  If you <take> <use> more ABILIFY than you should
True  ||  Contains<>|31.03|(87, 76, 95)|0.97|  ||  If you forget to take ABILIFY  ||  If you forget to <take> <use> ABILIFY>
True  ||  Contains<>|42.31|(83, 73, 95)|0.96|  ||  If you stop taking ABILIFY  ||  If you stop <taking> <using> ABILIFY>
True  ||  <=4|4.17|(98, 96, 98)|0.99|  ||  4.       Possible side effects  ||  4 Possible side effects
True  ||  Contains<>|3.92|(98, 98, 99)|1.0|  ||  Additional side effects in children and adolescents  ||  Additional side effects in children <and adolescents>
True  ||    ||  Reporting of side effects  ||  Reporting of side effects
True  ||  <=7|4.35|(98, 95, 98)|0.99|  ||  5.       How to store ABILIFY  ||  5 How to store ABILIFY
True  ||  >7|2.22|(99, 98, 99)|0.99|  ||  6.       Contents of the pack and other information  ||  6 Contents of the pack and other information
True  ||    ||  What ABILIFY

# Content Extraction Stage

In [83]:
extractContentlogger = loggerCreator('ExtractContentBetween_'+ getRandomString(1))
extractorObj = DataBetweenHeadingsExtractor(extractContentlogger, coll)
dfExtractedHierRR = extractorObj.extractContentBetweenHeadings('Abilify-h-471-e_ PACKAGE LEAFLET.json')

2021-04-22 11:47:46,065 : ExtractContentBetween_y : Cleaning Match Results
2021-04-22 11:47:46,070 : ExtractContentBetween_y : Finished Cleaning Match Results
2021-04-22 11:47:46,098 : ExtractContentBetween_y : Extracting Content Between Headings
2021-04-22 11:47:46,121 : ExtractContentBetween_y : Finished Extracting Content Between Headings


File being processed: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\partitionedJSONs\Abilify-h-471-e_ PACKAGE LEAFLET.json
--------------------------------------------


Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id,Text,Html_betw
0,786,23001,CAP,,PACKAGE LEAFLET,,B. PACKAGE LEAFLET,24,ba7f0011-777c-4c87-ab14-945b7f219145,0,,\nB. PACKAGE LEAFLET\n\n \n\nPackage leaflet: Information for the user\n \nABILIFY 5 mg tablets\nABILIFY 10 mg tablets\nABILIFY 15 mg tablets\nABILIFY 30 mg tablets\n \naripiprazole\n \nRead all o...,"<p class=""TitleA""><span lang=""EN-GB"" style=""color:black"">B. PACKAGE LEAFLET</span></p><b><span lang=""EN-GB"" style='font-size:11.0pt;font-family:""Times New Roman"",serif; color:black'><br clear=""all..."
1,788,23003,CAP,,What is in this leaflet,23001.0,What is in this leaflet,43,6399df46-b26b-41ef-8faa-1e468be43bd7,0,23001.0,\nWhat is in this leaflet\n1. What ABILIFY is and what it is used for\n2. What you need to know before you take ABILIFY\n3. How to take ABILIFY\n4. Possible side effects\n5 ...,"<p class=""MsoNormal""><b><span lang=""EN-GB"" style=""color:black"">What is in this leaflet</span></b></p><p class=""EMEABodyText"" style=""margin-left:28.35pt;text-indent:-28.35pt""><span lang=""EN-GB"" sty..."
2,789,23004,CAP,1.0,What X is and what it is used for,23001.0,1. What ABILIFY is and what it is used for,52,10306e47-6cf5-4529-b240-c8c69af4c4c7,0,23001.0,\n1. What ABILIFY is and what it is used for\n \nABILIFY contains the active substance aripiprazole and belong to a group of medicines called antipsychotics. It is used to treat adults and a...,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt""><b><span lang=""EN-GB"" style=""color:black"">1. What ABILIFY is and what it is used for</span></b></p><p class=""EMEABodyTex..."
3,790,23005,CAP,2.0,What you need to know before you <take> <use> X,23001.0,2. What you need to know before you take ABILIFY,59,1fe30967-1779-470d-95e2-9c90a04c60c4,0,23001.0,\n2. What you need to know before you take ABILIFY\n,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt""><b><span lang=""EN-GB"" style=""color:black"">2. What you need to know before you take ABILIFY</span></b></p><p class=""EMEAB..."
4,791,23006,CAP,,Do not <take> <use> X,23005.0,Do not take ABILIFY,61,b62e92fa-848f-4e9e-b9cb-b3b7fb89afdf,0,23005.0,\nDo not take ABILIFY\n• if you are allergic to aripiprazole or any of the other ingredients of this medicine (listed in section 6).\n,"<p class=""MsoNormal""><b><span lang=""EN-GB"" style=""color:black"">Do not take ABILIFY</span></b></p><p class=""EMEABodyTextIndent""><span lang=""EN-GB"" style=""color:black"">• if you are allergic t..."
5,792,23007,CAP,,Warnings and precautions,23005.0,Warnings and precautions,64,20686598-1bda-4358-a32d-52fc5ce73748,0,23005.0,\nWarnings and precautions\nTalk to your doctor before taking ABILIFY.\n \nSuicidal thoughts and behaviours have been reported during aripiprazole treatment. Tell your doctor immediately if you ar...,"<p class=""MsoNormal""><b><span lang=""EN-GB"" style=""color:black"">Warnings and precautions</span></b></p><p class=""EMEABodyText""><span lang=""EN-GB"" style=""color:black"">Talk to your doctor before taki..."
6,793,23008,CAP,,Children <and adolescents>,23005.0,Children and adolescents,90,2ef4ce01-dcb9-4ef2-bf32-9ce8368cfae7,0,23005.0,\nChildren and adolescents\nDo not use this medicine in children and adolescents under 13 years of age. It is not known if it is safe and effective in these patients.\n,"<p class=""MsoNormal""><b><span lang=""EN-GB"" style=""color:black"">Children and adolescents</span></b></p><p class=""MsoNormal""><span lang=""EN-GB"" style=""color:black"">Do not use this medicine in childr..."
7,794,23009,CAP,,Other medicines and X,23005.0,Other medicines and ABILIFY,93,64c7ac18-2ac4-43bf-bd98-1338a58e3276,0,23005.0,"\nOther medicines and ABILIFY\nTell your doctor or pharmacist if you are taking, have recently taken or might take any other medicines, including medicines obtained without a prescription.\n \nBlo...","<p class=""MsoNormal""><b><span lang=""EN-GB"" style=""color:black"">Other medicines and ABILIFY</span></b></p><p class=""MsoNormal""><span lang=""EN-GB"" style=""color:black"">Tell your doctor or pharmacist ..."
8,795,23010,CAP,,"X with <food> <and> <,> <drink> <and> <alcohol>",23005.0,"ABILIFY with food, drink and alcohol",121,bf52352e-edd8-4b45-ba51-e791cc8fdcd4,0,23005.0,"\nABILIFY with food, drink and alcohol\nThis medicine can be taken regardless of meals.\nAlcohol should be avoided.\n","<p class=""MsoNormal""><b><span lang=""EN-GB"" style=""color:black"">ABILIFY with food, drink and alcohol</span></b></p><p class=""EMEABodyText""><span lang=""EN-GB"" style=""color:black"">This medicine can b..."
9,796,23011,CAP,,"Pregnancy <and> <,> breast-feeding <and fertility>",23005.0,"Pregnancy, breast-feeding and fertility",125,7294f0d4-67e1-40ff-bfc6-4effd7beb960,0,23005.0,"\nPregnancy, breast-feeding and fertility\nIf you are pregnant or breast-feeding, think you may be pregnant or are planning to have a baby, ask your doctor for advice before taking this medicine.\...","<p class=""MsoNormal""><b><span lang=""EN-GB"" style=""color:black"">Pregnancy, breast-feeding and fertility</span></b></p><p class=""EMEABodyText""><span lang=""EN-GB"" style=""color:black"">If you are pregn..."


# XML Generation Stage

In [84]:
xmlLogger = loggerCreator('XmlGeneration_'+ getRandomString(1))
fhirXmlGeneratorObj = FhirXmlGenerator(xmlLogger)
fhirXmlGeneratorObj.generateXml(dfExtractedHierRR,  'Abilify-h-471-e_ PACKAGE LEAFLET.xml')

2021-04-22 11:47:56,146 : XmlGeneration_4 : Initiating XML Generation
2021-04-22 11:47:56,415 : XmlGeneration_4 : Writing to File:Abilify-h-471-e_ PACKAGE LEAFLET.xml


In [75]:
convertCollectionToDataFrame(coll)

Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id
0,786,23001,CAP,,PACKAGE LEAFLET,,B. PACKAGE LEAFLET,24,ba7f0011-777c-4c87-ab14-945b7f219145,0,
1,788,23003,CAP,,What is in this leaflet,23001.0,What is in this leaflet,43,6399df46-b26b-41ef-8faa-1e468be43bd7,0,23001.0
2,789,23004,CAP,1.0,What X is and what it is used for,23001.0,1. What ABILIFY is and what it is used for,52,10306e47-6cf5-4529-b240-c8c69af4c4c7,0,23001.0
3,790,23005,CAP,2.0,What you need to know before you <take> <use> X,23001.0,2. What you need to know before you take ABILIFY,59,1fe30967-1779-470d-95e2-9c90a04c60c4,0,23001.0
4,791,23006,CAP,,Do not <take> <use> X,23005.0,Do not take ABILIFY,61,b62e92fa-848f-4e9e-b9cb-b3b7fb89afdf,0,23005.0
5,792,23007,CAP,,Warnings and precautions,23005.0,Warnings and precautions,64,20686598-1bda-4358-a32d-52fc5ce73748,0,23005.0
6,793,23008,CAP,,Children <and adolescents>,23005.0,Children and adolescents,90,2ef4ce01-dcb9-4ef2-bf32-9ce8368cfae7,0,23005.0
7,794,23009,CAP,,Other medicines and X,23005.0,Other medicines and ABILIFY,93,64c7ac18-2ac4-43bf-bd98-1338a58e3276,0,23005.0
8,795,23010,CAP,,"X with <food> <and> <,> <drink> <and> <alcohol>",23005.0,"ABILIFY with food, drink and alcohol",121,bf52352e-edd8-4b45-ba51-e791cc8fdcd4,0,23005.0
9,796,23011,CAP,,"Pregnancy <and> <,> breast-feeding <and fertility>",23005.0,"Pregnancy, breast-feeding and fertility",125,7294f0d4-67e1-40ff-bfc6-4effd7beb960,0,23005.0


In [326]:
documentAnnotationObj = DocumentAnnotation('Kalydeco II-86-PI-clean_SmPC.json','c270d6ccaf9e47e9b20b322e2383c4ba','https://spor-uat.azure-api.net/pms/api/v2/',df,coll)

In [327]:
documentAnnotationObj.processRegulatedAuthorizationForDoc()

 ['EU/1/12/782/001', 'EU/1/12/782/002', 'EU/1/12/782/005', 'EU/1/12/782/003', 'EU/1/12/782/004', 'EU/1/12/782/006']
EU/1/12/782/001


MissingKeyValuePair: Missing Key 'entry' in the regulated authorization API output

In [328]:
documentAnnotationObj.processRegulatedAuthorizationForDoc(['EU/3/00/001','EU/1/97/039/003'])

EU/3/00/001
Skipping entry due to incorrect code 220000000062
No Regulated Authorization find with code 220000000061
Skipping entry due to incorrect code 220000000062
No Regulated Authorization find with code 220000000061
Skipping entry due to incorrect code 220000000062
No Regulated Authorization find with code 220000000061
EU/1/97/039/003
Found entry with code 220000000061
['0', '600000034241']
Cystagon 150 mg - Capsule, hard


[('0', '600000034241', 'Cystagon 150 mg - Capsule, hard')]