In [191]:
import sys, os
module_path = os.path.abspath(os.path.join('..'))
module_path = os.path.join(module_path, 'scripts')
if module_path not in sys.path:
    sys.path.append(module_path)

In [192]:
import pprint
import pandas as pd
import uuid
import json
import os
import glob
import re
import sys
from bs4 import NavigableString, BeautifulSoup
from collections import defaultdict
import random
import string

from utils.config import config
from utils.logger.logger import loggerCreator

## ePI Modules
from parse.rulebook.rulebook import StyleRulesDictionary

from parse.extractor.parser import parserExtractor
from match.matchDocument.matchDocument import MatchDocument
from documentAnnotation.documentAnnotation import DocumentAnnotation
from htmlDocTypePartitioner.partition import DocTypePartitioner
from extractContentBetweenHeadings.dataBetweenHeadingsExtractor import DataBetweenHeadingsExtractor
from fhirXmlGenerator.fhirXmlGenerator import FhirXmlGenerator

%load_ext autoreload

%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [89]:
def getRandomString(N):
    str_ = ''.join(random.choice(string.ascii_uppercase + string.digits \
            + string.ascii_lowercase) for _ in range(N))
    return str_



# Set Required Field for Parsing and Partition Modules

### Please ensure that your converted_html folder has html files in their specific language folders

Example: If your language code is en, please ensure that all html files reside in the converted_html/en folder. If folder is not present, a folder not found exception will be thrown

### English

In [319]:
ePILanguage = 'en'
fileNameQrd = 'qrd_canonical_mode_CAP_NAP.csv'
procedureType = 'CAP'

### German

In [264]:
ePILanguage = 'de'
fileNameQrd = 'qrd_canonical_mode_CAP_NAP.csv'
procedureType = 'CAP'

### Spanish

In [65]:
ePILanguage = 'es'
fileNameQrd = 'qrd_canonical_mode_CAP_NAP.csv'
procedureType = 'CAP'

# Html Parsing Stage

In [320]:
class FolderNotFoundError(Exception):
    pass

## Generate input folder path
module_path = os.path.abspath(os.path.join('..'))
module_path = os.path.join(module_path, 'data')
module_path = os.path.join(module_path, 'converted_to_html')
module_path = os.path.join(module_path, ePILanguage)

## Generate output folder path
output_json_path = module_path.replace('converted_to_html','outputJSON')

"""
    Check if input folder exists, else throw exception
"""
if(os.path.exists(module_path)):
    filenames = glob.glob(os.path.join(module_path, '*.html'))
    filenames.extend(glob.glob(os.path.join(module_path, '*.htm')))
    
    ## Create language specific folder in outputJSON folder if it doesn't exist
    if(not os.path.exists(output_json_path)):
        os.mkdir(output_json_path)
    logger = loggerCreator('Parser_'+ getRandomString(1))
    
    styleRulesObj = StyleRulesDictionary(loggerCreator('Style Dictionary_'+ getRandomString(1)),
                                     language = ePILanguage,
                                     fileName = fileNameQrd,
                                     procedureType = procedureType)

    parserObj = parserExtractor(config, logger, styleRulesObj.styleRuleDict, 
                            styleRulesObj.styleFeatureKeyList, 
                            styleRulesObj.qrd_section_headings)

    for input_filename in filenames:
      if(input_filename.find('Kalydeco II-86-PI-clean')!=-1):
        output_filename = input_filename.replace('converted_to_html','outputJSON')
        output_filename = output_filename.replace('.html','.json')
        output_filename = output_filename.replace('.htm','.json')
        print(input_filename, output_filename)
        parserObj.createPIJsonFromHTML(input_filepath = input_filename,
                                       output_filepath = output_filename,
                                       img_base64_dict= parserObj.convertImgToBase64(input_filename),
                                      )
else:
    raise FolderNotFoundError(module_path + " not found")

2021-04-26 02:18:12,611 : Style Dictionary_6 : Creating default style dictionary in file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\styleRules\rule_dictionary_en.json
2021-04-26 02:18:12,690 : Style Dictionary_6 : Qrd Section Keys Generated: ANNEX I, ANNEX II, ANNEX III, B. PACKAGE LEAFLET


F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\converted_to_html\en\Kalydeco II-86-PI-clean.html F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\outputJSON\en\Kalydeco II-86-PI-clean.json


2021-04-26 02:18:21,938 : Parser_D : Writing to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\outputJSON\en\Kalydeco II-86-PI-clean.json


# Partition Stage

In [321]:
styleRulesObj = StyleRulesDictionary(loggerCreator('Style Dictionary_'+ getRandomString(1)), 
                                     language = ePILanguage,
                                     fileName = fileNameQrd,
                                     procedureType = procedureType)

path_json = os.path.join(os.path.abspath(os.path.join('..')), 'data', 'outputJSON', ePILanguage)

partitionlogger = loggerCreator('Partition_'+ getRandomString(1))
partitioner = DocTypePartitioner(partitionlogger)
partitioner.partitionHtmls(styleRulesObj.qrd_section_headings, path_json)

2021-04-26 02:18:36,050 : Style Dictionary_R : Reading style dictionary in file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\styleRules\rule_dictionary_en.json
2021-04-26 02:18:36,050 : Style Dictionary_R : Reading style dictionary in file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\styleRules\rule_dictionary_en.json
2021-04-26 02:18:36,103 : Style Dictionary_R : Qrd Section Keys Generated: ANNEX I, ANNEX II, ANNEX III, B. PACKAGE LEAFLET
2021-04-26 02:18:36,103 : Style Dictionary_R : Qrd Section Keys Generated: ANNEX I, ANNEX II, ANNEX III, B. PACKAGE LEAFLET
2021-04-26 02:18:36,118 : Partition_8 : Partitioning Json: Kalydeco II-86-PI-clean.json


*************************** Texts with more than 2 characters**************************************


Unnamed: 0,Bold,Classes,Element,HasBorder,ID,Indexed,IsHeadingType,IsListItem,IsPossibleHeading,Italics,ParentId,Styles,Text,Underlined,Uppercased
0,False,['WordSection1'],"<div class=""WordSection1""> <p align=""center"" class=""MsoNormal"" style=""text-align:center;line-height:normal""><b><span lang=""EN-GB""> </span></b></p> <p align=""center"" class=""MsoNormal"" style=""text-a...",False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,False,,False,False,False,4c9313d0-78ec-468f-ba9c-707d28cf9f41,,...,False,False
24,True,['MsoNormal'],"<p align=""center"" class=""MsoNormal"" style=""text-align:center;line-height:normal""><b><span lang=""EN-GB"">ANNEX I</span></b></p>",False,e0889eaf-03a1-499e-8359-cbe3b2729087,False,,False,True,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,text-align:center;line-height:normal,ANNEX I,False,True
26,True,['TitleA'],"<p class=""TitleA""><span lang=""EN-GB"">SUMMARY OF PRODUCT CHARACTERISTICS</span></p>",False,73c60cce-cadb-41b3-8bb9-8ccf958d972a,False,,False,False,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,,SUMMARY OF PRODUCT CHARACTERISTICS,False,True
32,True,,"<h1 style=""margin:0in""><span lang=""EN-GB"" style='font-size:11.0pt;font-family: ""Times New Roman"",serif'>1. NAME OF THE MEDICINAL PRODUCT</span></h1>",False,f20bd308-1923-4f21-8683-66eb29755c2e,True,L1,False,True,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,margin:0in,1. NAME OF THE MEDICINAL PRODUCT,False,True
34,False,['MsoNormal'],"<p class=""MsoNormal"" style=""line-height:normal;page-break-after:avoid""><span lang=""EN-GB"">Kalydeco 150 mg film‑coated tablets</span></p>",False,e84da6e3-d451-4d51-b713-487421166004,False,,False,False,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,line-height:normal;page-break-after:avoid,Kalydeco 150 mg film‑coated tablets,False,False


2021-04-26 02:18:36,408 : Partition_8 : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\en\Kalydeco II-86-PI-clean_SmPC.json


*************************** Texts with more than 2 characters**************************************


Unnamed: 0,Bold,Classes,Element,HasBorder,ID,Indexed,IsHeadingType,IsListItem,IsPossibleHeading,Italics,ParentId,Styles,Text,Underlined,Uppercased
1218,True,['No-numheading3Agency'],"<p align=""center"" class=""No-numheading3Agency"" style=""margin:0in;text-align:center; page-break-after:auto""><span lang=""EN-GB"" style='font-family:""Times New Roman"",serif; text-transform:uppercase'>...",False,b0a1fce6-7582-487f-8cbb-a7efef4cc182,False,,False,True,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,margin:0in;text-align:center;\npage-break-after:auto,Annex II,False,True
1220,True,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt""><b><span lang=""EN-GB"">A. MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE </span></b></p>",False,99923573-32ab-42ab-aa98-d1db7066e587,True,L1,False,True,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,margin-left:28.35pt;text-indent:-28.35pt,A. MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE,False,True
1222,True,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt""><b><span lang=""EN-GB"">B. CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE</span></b></p>",False,d465c62a-6915-4da0-9595-1f42c9856a1f,True,L1,False,True,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,margin-left:28.35pt;text-indent:-28.35pt,B. CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE,False,True
1224,True,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt""><b><span lang=""EN-GB"">C. OTHER CONDITIONS AND REQUIREMENTS OF THE MARKETING AUTHORISATION</span></b></p>",False,937ca248-82d8-4ac0-b996-117811048fec,True,L1,False,True,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,margin-left:28.35pt;text-indent:-28.35pt,C. OTHER CONDITIONS AND REQUIREMENTS OF THE MARKETING AUTHORISATION,False,True
1226,True,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt""><b><span lang=""EN-GB"">D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT</s...",False,5d69d7fc-7a44-4f19-a081-ceb163b73627,True,L1,False,True,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,margin-left:28.35pt;text-indent:-28.35pt,D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT,False,True


2021-04-26 02:18:36,645 : Partition_8 : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\en\Kalydeco II-86-PI-clean_ANNEX II.json


*************************** Texts with more than 2 characters**************************************


Unnamed: 0,Bold,Classes,Element,HasBorder,ID,Indexed,IsHeadingType,IsListItem,IsPossibleHeading,Italics,ParentId,Styles,Text,Underlined,Uppercased
1304,True,['MsoNormal'],"<p align=""center"" class=""MsoNormal"" style=""text-align:center;line-height:normal""><b><span lang=""EN-GB"">ANNEX III</span></b></p>",False,0b0ad730-65b9-403e-835e-2dc64e093e9d,False,L1,False,True,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,text-align:center;line-height:normal,ANNEX III,False,True
1306,True,['MsoNormal'],"<p align=""center"" class=""MsoNormal"" style=""text-align:center;line-height:normal""><b><span lang=""EN-GB"">LABELLING AND PACKAGE LEAFLET</span></b></p>",False,0a5d42a1-5f0c-46d0-a579-20dae3dd6be7,False,L1,False,True,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,text-align:center;line-height:normal,LABELLING AND PACKAGE LEAFLET,False,True
1345,True,['TitleA'],"<p class=""TitleA""><span lang=""EN-GB"">A. LABELLING</span></p>",False,d3dc3b68-fa13-4e32-888e-68f5482bacc0,True,L2,False,True,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,,A. LABELLING,False,True
1349,False,,"<div style=""border:solid windowtext 1.0pt;padding:1.0pt 4.0pt 1.0pt 4.0pt""> <p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;line-height: normal;border:none;padding:0in""><b><sp...",True,95505895-0d47-44b6-9965-79992f2c4342,False,,False,False,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,border:solid windowtext 1.0pt;padding:1.0pt 4.0pt 1.0pt 4.0pt,,False,False
1350,True,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;line-height: normal;border:none;padding:0in""><b><span lang=""EN-GB"">PARTICULARS TO APPEAR ON THE OUTER PACKAGING</span></b></p>",False,6561322c-ce30-4c5f-abb7-c190b4f2f183,False,L1,False,True,False,95505895-0d47-44b6-9965-79992f2c4342,margin-left:28.35pt;text-indent:-28.35pt;line-height:\nnormal;border:none;padding:0in,PARTICULARS TO APPEAR ON THE OUTER PACKAGING,False,True


2021-04-26 02:18:36,903 : Partition_8 : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\en\Kalydeco II-86-PI-clean_ANNEX III.json


*************************** Texts with more than 2 characters**************************************


Unnamed: 0,Bold,Classes,Element,HasBorder,ID,Indexed,IsHeadingType,IsListItem,IsPossibleHeading,Italics,ParentId,Styles,Text,Underlined,Uppercased
2918,True,['TitleA'],"<p class=""TitleA""><span lang=""EN-GB"">B. PACKAGE LEAFLET</span></p>",False,2e155332-ec5e-4fcd-9322-ad35c12af6d6,True,,False,True,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,,B. PACKAGE LEAFLET,False,True
2923,True,['MsoNormal'],"<p align=""center"" class=""MsoNormal"" style=""text-align:center;line-height:normal""><b><span lang=""EN-GB"">Package leaflet: Information for the patient</span></b></p>",False,448aaebe-f42a-45f6-9147-3edac730cbe1,False,L2,False,True,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,text-align:center;line-height:normal,Package leaflet: Information for the patient,False,False
2925,True,['MsoNormal'],"<p align=""center"" class=""MsoNormal"" style=""text-align:center;line-height:normal""><b><span lang=""EN-GB"">Kalydeco 150 mg film-coated tablets</span></b></p>",False,787c1333-bd90-4c2e-aaf4-d90bd6a94fce,False,L2,False,True,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,text-align:center;line-height:normal,Kalydeco 150 mg film-coated tablets,False,False
2926,False,['MsoNormal'],"<p align=""center"" class=""MsoNormal"" style=""text-align:center;line-height:normal""><span lang=""EN-GB"">ivacaftor</span></p>",False,9db5d5f0-94f1-4705-b5c0-43a2980ecf94,False,,False,False,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,text-align:center;line-height:normal,ivacaftor,False,False
2929,True,['MsoNormal'],"<p class=""MsoNormal"" style=""line-height:normal;page-break-after:avoid""><b><span lang=""EN-GB"">Read all of this leaflet carefully before you start taking this medicine because it contains important ...",False,766e009f-786b-48e9-8a13-e668acadec36,False,L2,False,True,False,73096ebe-2c3d-436c-a95e-1603e3dc2f47,line-height:normal;page-break-after:avoid,Read all of this leaflet carefully before you start taking this medicine because it contains important information for you.,False,False


2021-04-26 02:18:37,038 : Partition_8 : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\en\Kalydeco II-86-PI-clean_ PACKAGE LEAFLET.json


# Matching Stage

In [100]:
procedureType = 'CAP'
languageCode = 'es'
stopWordlanguage = 'spanish'
fileNameQrd = 'qrd_canonical_mode_CAP_NAP.csv'
fileNameMatchRuleBook = 'ruleDict.json'
fileNameDocumentTypeNames = 'documentTypeNames.json'

In [101]:
### SmPC
documentNumber = 3
docFilter = "PROSPECTO.json"
stopWordFilterLen = 100
start=0
end=1
isPackageLeaflet=True

In [102]:
parseDocuments(procedureType,
               languageCode,
               documentNumber,
               docFilter,
               fileNameQrd,
               fileNameMatchRuleBook,
               fileNameDocumentTypeNames,
               stopWordFilterLen,
               start,
               end,
               isPackageLeaflet,
               'Kalydeco'
              )

F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\es
['emea-combined-h-2494-es_ PROSPECTO.json', 'emea-combined-h-2494-es_ANEXO II.json', 'emea-combined-h-2494-es_ANEXO III.json', 'emea-combined-h-2494-es_SmPC.json']
['emea-combined-h-2494-es_ PROSPECTO.json']
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\es\emea-combined-h-2494-es_ PROSPECTO.json
--------------------------------------------
Package leaflet
True  ||  <=1|25.0|(86, 100, 95)|0.95|  ||  B. PROSPECTO  ||  PROSPECTO
True  ||    ||  Contenido del prospecto  ||  Contenido del prospecto
----------------------------------
RemovedByStyle  ||  >7|2.5|(99, 97, 99)|1.0|  ||  1.    Qué es Kalydeco y para qué se utiliza  ||  1 Qué es Kalydeco y para qué se utiliza
----------------------------------
----------------------------------
RemovedByStyle  ||  Contains<>|16.36|(92, 85, 95)|0.97|  ||  2.    Qué necesita saber antes de empezar a tomar Kalydeco  ||  2 Q


OriginalCheck
('<=7|21.15|(88, 100, 95)|0.98|', 'Si interrumpe el tratamiento con Kalydeco de su hijo', 'Si interrumpe el tratamiento con Kalydeco')

True  ||  <=4|3.57|(98, 96, 98)|0.99|  ||  4.            Posibles efectos adversos  ||  4 Posibles efectos adversos
True  ||  Contains<>|38.46|(78, 75, 88)|0.86|  ||  Efectos adversos adicionales en niños y adolescentes  ||  Otros efectos adversos en niños <y adolescentes>
True  ||    ||  Comunicación de efectos adversos  ||  Comunicación de efectos adversos
True  ||  <=4|3.7|(98, 96, 98)|0.99|  ||  5.            Conservación de Kalydeco  ||  5 Conservación de Kalydeco
True  ||  <=7|2.13|(99, 98, 99)|1.0|  ||  6.            Contenido del envase e información adicional  ||  6 Contenido del envase e información adicional
True  ||    ||  Composición de Kalydeco  ||  Composición de Kalydeco 
True  ||    ||  Aspecto del producto y contenido del envase  ||  Aspecto del producto y contenido del envase
True  ||  Contains<>|73.91|(73, 100, 90)|0.

2021-04-25 22:06:24,367 : ExtractContentBetween_r : Cleaning Match Results
2021-04-25 22:06:24,370 : ExtractContentBetween_r : Finished Cleaning Match Results
2021-04-25 22:06:24,382 : ExtractContentBetween_r : Extracting Content Between Headings
2021-04-25 22:06:24,392 : ExtractContentBetween_r : Finished Extracting Content Between Headings




Heading Not Found 
 ['q Este medicamento está sujeto a seguimiento adicional, lo que agilizará la detección de nueva información sobre su seguridad. Puede contribuir comunicando los efectos adversos que pudiera usted tener. La parte final de la sección 4 incluye información sobre cómo comunicar estos efectos adversos.', 'X contiene {nombre del (de los) excipiente(s)}', 'Pueden solicitar más información respecto a este medicamento dirigiéndose al representante local del titular de la autorización de comercialización:', 'Esta información está destinada únicamente a profesionales sanitarios:']


dict_keys(['1 Qué es Kalydeco y para qué se utiliza', '2 Qué necesita saber antes de empezar a <tomar><usar> Kalydeco', '3 Cómo <tomar><usar> Kalydeco', '4 Posibles efectos adversos', '5 Conservación de Kalydeco', '6 Contenido del envase e información adicional', 'Kalydeco contiene {nombre del (de los) excipiente(s)}'])
Error Found
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\fun

Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id,Text,Html_betw
0,922,27001,CAP,,PROSPECTO,,B. PROSPECTO,25,1f145174-0f2a-4427-a796-cc3696bf8514,0,,\nB. PROSPECTO\n\n \n\nProspecto: información para el paciente\n \nKalydeco 75 mg comprimidos recubiertos con película\nKalydeco 150 mg comprimidos recubiertos con película\nivacaftor\n \nLea todo...,"<p class=""TitleA""><span lang=""ES"">B. PROSPECTO</span></p><b><span lang=""ES"" style='font-size:11.0pt;font-family:""Times New Roman"",serif'><br clear=""all"" style=""page-break-before:always""/> </span><..."
1,924,27003,CAP,,Contenido del prospecto,27001.0,Contenido del prospecto,42,6d5c7979-0216-482a-a279-b6fc20f1630d,0,27001.0,\nContenido del prospecto\n \n1. Qué es Kalydeco y para qué se utiliza\n2. Qué necesita saber antes de empezar a tomar Kalydeco\n3. Cómo tomar Kalydeco\n4. Posibles efectos adversos\n5...,"<p class=""MsoNormal"" style=""margin-right:-.1pt;page-break-after:avoid""><b><span lang=""ES"">Contenido del prospecto</span></b></p><p class=""MsoNormal"" style=""margin-right:-.1pt;page-break-after:avoi..."
2,925,27004,CAP,1.0,Qué es X y para qué se utiliza,27001.0,1. Qué es Kalydeco y para qué se utiliza,52,4cad55a7-a9e2-4766-aaf3-f52a3e79e7f1,0,27001.0,\n1. Qué es Kalydeco y para qué se utiliza\n \nKalydeco contiene el principio activo ivacaftor. Ivacaftor actúa a nivel del regulador de la conductancia transmembrana de la fibrosis quí...,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.1pt;margin-bottom:0in; margin-left:28.5pt;margin-bottom:.0001pt;text-indent:-28.5pt;page-break-after: avoid""><b><span lang=""ES"">1.<span st..."
3,926,27005,CAP,2.0,Qué necesita saber antes de empezar a <tomar><usar> X,27001.0,2. Qué necesita saber antes de empezar a tomar Kalydeco,64,b0edf109-c643-4565-851f-2a86b9747d1a,0,27001.0,\n2. Qué necesita saber antes de empezar a tomar Kalydeco\n,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.1pt;margin-bottom:0in; margin-left:28.5pt;margin-bottom:.0001pt;text-indent:-28.5pt;page-break-after: avoid""><b><span lang=""ES"">2.<span st..."
4,927,27006,CAP,,No <tome><use> X,27005.0,No tome Kalydeco,66,da7e8ad2-adef-4174-a503-896dceaf4964,0,27005.0,\nNo tome Kalydeco\n \n· si es alérgico a ivacaftor o a alguno de los demás componentes de este medicamento (incluidos en la sección 6).\n,"<p class=""MsoNormal"" style=""page-break-after:avoid""><b><span lang=""ES"">No tome Kalydeco</span></b></p><p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p><p class=""MsoN..."
5,928,27007,CAP,,Advertencias y precauciones,27005.0,Advertencias y precauciones,70,7ea1e668-9c52-4600-a19a-5551e396c687,0,27005.0,\nAdvertencias y precauciones\n \n· Consulte a su médico si tiene o ha tenido anteriormente problemas hepáticos. Puede ser necesario que su médico le ajuste la dosis.\n· Se ha observ...,"<p class=""MsoNormal"" style=""page-break-after:avoid""><b><span lang=""ES"">Advertencias y precauciones</span></b></p><p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p><p ..."
6,929,27008,CAP,,Niños <y adolescentes>,27005.0,Niños y adolescentes,92,2ea93de3-659c-4dfb-ad23-975194649bab,0,27005.0,"\nNiños y adolescentes\n \nEste medicamento no se debe dar a niños menores de 4 meses, ya que se desconoce si ivacaftor es seguro y eficaz en estos niños.\n \nEste medicamento no se debe dar en co...","<p class=""MsoNormal"" style=""page-break-after:avoid""><b><span lang=""ES"">Niños y adolescentes</span></b></p><p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p><p class=""..."
7,930,27009,CAP,,Otros medicamentos y X,27005.0,Otros medicamentos y Kalydeco,98,4fe74f77-a3aa-41c8-810d-0bfe27ea4bd5,0,27005.0,"\nOtros medicamentos y Kalydeco\n \nInforme a su médico o farmacéutico si está utilizando, ha utilizado recientemente o pudiera tener que utilizar cualquier otro medicamento. Algunos medicamentos ...","<p class=""MsoNormal"" style=""page-break-after:avoid""><b><span lang=""ES"">Otros medicamentos y Kalydeco</span></b></p><p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p><..."
8,931,27010,CAP,,"<Uso><Toma> de X con <alimentos><y><,><bebidas><y><alcohol>",27005.0,Toma de Kalydeco con alimentos y bebidas,112,55389311-07e7-4374-981b-3b00aa2a4cf7,0,27005.0,\nToma de Kalydeco con alimentos y bebidas\n \nEvite los alimentos o bebidas que contengan pomelo durante el tratamiento con Kalydeco ya que pueden aumentar los efectos adversos de Kalydeco al aum...,"<p class=""MsoNormal"" style=""margin-right:-.1pt;page-break-after:avoid""><b><span lang=""ES"">Toma de Kalydeco con alimentos y bebidas</span></b></p><p class=""labeltext"" style=""page-break-after:avoid""..."
9,932,27011,CAP,,"Embarazo <y><,> lactancia <y fertilidad>",27005.0,Embarazo y lactancia,116,db25c9cf-23ff-4705-a07e-73513d6fa78c,0,27005.0,"\nEmbarazo y lactancia\n \nSi está embarazada o en periodo de lactancia, cree que podría estar embarazada o tiene intención de quedarse embarazada, consulte a su médico antes de utilizar este medi...","<p class=""MsoNormal"" style=""margin-right:-.1pt;page-break-after:avoid""><b><span lang=""ES"">Embarazo y lactancia</span></b></p><p class=""labeltext"" style=""page-break-after:avoid""><span lang=""ES-TRAD..."


2021-04-25 22:06:24,436 : XmlGeneration_H : Initiating XML Generation
2021-04-25 22:06:24,547 : XmlGeneration_H : Writing to File:emea-combined-h-2494-es_ PROSPECTO.xml


# Content Extraction Stage

In [83]:
extractContentlogger = loggerCreator('ExtractContentBetween_'+ getRandomString(1))
extractorObj = DataBetweenHeadingsExtractor(extractContentlogger, coll)
dfExtractedHierRR = extractorObj.extractContentBetweenHeadings('Abilify-h-471-e_ PACKAGE LEAFLET.json')

2021-04-22 11:47:46,065 : ExtractContentBetween_y : Cleaning Match Results
2021-04-22 11:47:46,070 : ExtractContentBetween_y : Finished Cleaning Match Results
2021-04-22 11:47:46,098 : ExtractContentBetween_y : Extracting Content Between Headings
2021-04-22 11:47:46,121 : ExtractContentBetween_y : Finished Extracting Content Between Headings


File being processed: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\partitionedJSONs\Abilify-h-471-e_ PACKAGE LEAFLET.json
--------------------------------------------


Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id,Text,Html_betw
0,786,23001,CAP,,PACKAGE LEAFLET,,B. PACKAGE LEAFLET,24,ba7f0011-777c-4c87-ab14-945b7f219145,0,,\nB. PACKAGE LEAFLET\n\n \n\nPackage leaflet: Information for the user\n \nABILIFY 5 mg tablets\nABILIFY 10 mg tablets\nABILIFY 15 mg tablets\nABILIFY 30 mg tablets\n \naripiprazole\n \nRead all o...,"<p class=""TitleA""><span lang=""EN-GB"" style=""color:black"">B. PACKAGE LEAFLET</span></p><b><span lang=""EN-GB"" style='font-size:11.0pt;font-family:""Times New Roman"",serif; color:black'><br clear=""all..."
1,788,23003,CAP,,What is in this leaflet,23001.0,What is in this leaflet,43,6399df46-b26b-41ef-8faa-1e468be43bd7,0,23001.0,\nWhat is in this leaflet\n1. What ABILIFY is and what it is used for\n2. What you need to know before you take ABILIFY\n3. How to take ABILIFY\n4. Possible side effects\n5 ...,"<p class=""MsoNormal""><b><span lang=""EN-GB"" style=""color:black"">What is in this leaflet</span></b></p><p class=""EMEABodyText"" style=""margin-left:28.35pt;text-indent:-28.35pt""><span lang=""EN-GB"" sty..."
2,789,23004,CAP,1.0,What X is and what it is used for,23001.0,1. What ABILIFY is and what it is used for,52,10306e47-6cf5-4529-b240-c8c69af4c4c7,0,23001.0,\n1. What ABILIFY is and what it is used for\n \nABILIFY contains the active substance aripiprazole and belong to a group of medicines called antipsychotics. It is used to treat adults and a...,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt""><b><span lang=""EN-GB"" style=""color:black"">1. What ABILIFY is and what it is used for</span></b></p><p class=""EMEABodyTex..."
3,790,23005,CAP,2.0,What you need to know before you <take> <use> X,23001.0,2. What you need to know before you take ABILIFY,59,1fe30967-1779-470d-95e2-9c90a04c60c4,0,23001.0,\n2. What you need to know before you take ABILIFY\n,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt""><b><span lang=""EN-GB"" style=""color:black"">2. What you need to know before you take ABILIFY</span></b></p><p class=""EMEAB..."
4,791,23006,CAP,,Do not <take> <use> X,23005.0,Do not take ABILIFY,61,b62e92fa-848f-4e9e-b9cb-b3b7fb89afdf,0,23005.0,\nDo not take ABILIFY\n• if you are allergic to aripiprazole or any of the other ingredients of this medicine (listed in section 6).\n,"<p class=""MsoNormal""><b><span lang=""EN-GB"" style=""color:black"">Do not take ABILIFY</span></b></p><p class=""EMEABodyTextIndent""><span lang=""EN-GB"" style=""color:black"">• if you are allergic t..."
5,792,23007,CAP,,Warnings and precautions,23005.0,Warnings and precautions,64,20686598-1bda-4358-a32d-52fc5ce73748,0,23005.0,\nWarnings and precautions\nTalk to your doctor before taking ABILIFY.\n \nSuicidal thoughts and behaviours have been reported during aripiprazole treatment. Tell your doctor immediately if you ar...,"<p class=""MsoNormal""><b><span lang=""EN-GB"" style=""color:black"">Warnings and precautions</span></b></p><p class=""EMEABodyText""><span lang=""EN-GB"" style=""color:black"">Talk to your doctor before taki..."
6,793,23008,CAP,,Children <and adolescents>,23005.0,Children and adolescents,90,2ef4ce01-dcb9-4ef2-bf32-9ce8368cfae7,0,23005.0,\nChildren and adolescents\nDo not use this medicine in children and adolescents under 13 years of age. It is not known if it is safe and effective in these patients.\n,"<p class=""MsoNormal""><b><span lang=""EN-GB"" style=""color:black"">Children and adolescents</span></b></p><p class=""MsoNormal""><span lang=""EN-GB"" style=""color:black"">Do not use this medicine in childr..."
7,794,23009,CAP,,Other medicines and X,23005.0,Other medicines and ABILIFY,93,64c7ac18-2ac4-43bf-bd98-1338a58e3276,0,23005.0,"\nOther medicines and ABILIFY\nTell your doctor or pharmacist if you are taking, have recently taken or might take any other medicines, including medicines obtained without a prescription.\n \nBlo...","<p class=""MsoNormal""><b><span lang=""EN-GB"" style=""color:black"">Other medicines and ABILIFY</span></b></p><p class=""MsoNormal""><span lang=""EN-GB"" style=""color:black"">Tell your doctor or pharmacist ..."
8,795,23010,CAP,,"X with <food> <and> <,> <drink> <and> <alcohol>",23005.0,"ABILIFY with food, drink and alcohol",121,bf52352e-edd8-4b45-ba51-e791cc8fdcd4,0,23005.0,"\nABILIFY with food, drink and alcohol\nThis medicine can be taken regardless of meals.\nAlcohol should be avoided.\n","<p class=""MsoNormal""><b><span lang=""EN-GB"" style=""color:black"">ABILIFY with food, drink and alcohol</span></b></p><p class=""EMEABodyText""><span lang=""EN-GB"" style=""color:black"">This medicine can b..."
9,796,23011,CAP,,"Pregnancy <and> <,> breast-feeding <and fertility>",23005.0,"Pregnancy, breast-feeding and fertility",125,7294f0d4-67e1-40ff-bfc6-4effd7beb960,0,23005.0,"\nPregnancy, breast-feeding and fertility\nIf you are pregnant or breast-feeding, think you may be pregnant or are planning to have a baby, ask your doctor for advice before taking this medicine.\...","<p class=""MsoNormal""><b><span lang=""EN-GB"" style=""color:black"">Pregnancy, breast-feeding and fertility</span></b></p><p class=""EMEABodyText""><span lang=""EN-GB"" style=""color:black"">If you are pregn..."


# XML Generation Stage

In [84]:
xmlLogger = loggerCreator('XmlGeneration_'+ getRandomString(1))
fhirXmlGeneratorObj = FhirXmlGenerator(xmlLogger)
fhirXmlGeneratorObj.generateXml(dfExtractedHierRR,  'Abilify-h-471-e_ PACKAGE LEAFLET.xml')

2021-04-22 11:47:56,146 : XmlGeneration_4 : Initiating XML Generation
2021-04-22 11:47:56,415 : XmlGeneration_4 : Writing to File:Abilify-h-471-e_ PACKAGE LEAFLET.xml


In [75]:
convertCollectionToDataFrame(coll)

Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id
0,786,23001,CAP,,PACKAGE LEAFLET,,B. PACKAGE LEAFLET,24,ba7f0011-777c-4c87-ab14-945b7f219145,0,
1,788,23003,CAP,,What is in this leaflet,23001.0,What is in this leaflet,43,6399df46-b26b-41ef-8faa-1e468be43bd7,0,23001.0
2,789,23004,CAP,1.0,What X is and what it is used for,23001.0,1. What ABILIFY is and what it is used for,52,10306e47-6cf5-4529-b240-c8c69af4c4c7,0,23001.0
3,790,23005,CAP,2.0,What you need to know before you <take> <use> X,23001.0,2. What you need to know before you take ABILIFY,59,1fe30967-1779-470d-95e2-9c90a04c60c4,0,23001.0
4,791,23006,CAP,,Do not <take> <use> X,23005.0,Do not take ABILIFY,61,b62e92fa-848f-4e9e-b9cb-b3b7fb89afdf,0,23005.0
5,792,23007,CAP,,Warnings and precautions,23005.0,Warnings and precautions,64,20686598-1bda-4358-a32d-52fc5ce73748,0,23005.0
6,793,23008,CAP,,Children <and adolescents>,23005.0,Children and adolescents,90,2ef4ce01-dcb9-4ef2-bf32-9ce8368cfae7,0,23005.0
7,794,23009,CAP,,Other medicines and X,23005.0,Other medicines and ABILIFY,93,64c7ac18-2ac4-43bf-bd98-1338a58e3276,0,23005.0
8,795,23010,CAP,,"X with <food> <and> <,> <drink> <and> <alcohol>",23005.0,"ABILIFY with food, drink and alcohol",121,bf52352e-edd8-4b45-ba51-e791cc8fdcd4,0,23005.0
9,796,23011,CAP,,"Pregnancy <and> <,> breast-feeding <and fertility>",23005.0,"Pregnancy, breast-feeding and fertility",125,7294f0d4-67e1-40ff-bfc6-4effd7beb960,0,23005.0


In [326]:
documentAnnotationObj = DocumentAnnotation('Kalydeco II-86-PI-clean_SmPC.json','c270d6ccaf9e47e9b20b322e2383c4ba','https://spor-uat.azure-api.net/pms/api/v2/',df,coll)

In [327]:
documentAnnotationObj.processRegulatedAuthorizationForDoc()

 ['EU/1/12/782/001', 'EU/1/12/782/002', 'EU/1/12/782/005', 'EU/1/12/782/003', 'EU/1/12/782/004', 'EU/1/12/782/006']
EU/1/12/782/001


MissingKeyValuePair: Missing Key 'entry' in the regulated authorization API output

In [328]:
documentAnnotationObj.processRegulatedAuthorizationForDoc(['EU/3/00/001','EU/1/97/039/003'])

EU/3/00/001
Skipping entry due to incorrect code 220000000062
No Regulated Authorization find with code 220000000061
Skipping entry due to incorrect code 220000000062
No Regulated Authorization find with code 220000000061
Skipping entry due to incorrect code 220000000062
No Regulated Authorization find with code 220000000061
EU/1/97/039/003
Found entry with code 220000000061
['0', '600000034241']
Cystagon 150 mg - Capsule, hard


[('0', '600000034241', 'Cystagon 150 mg - Capsule, hard')]

# End-To-End Flow 

In [267]:
def parseDocuments(procedureType,
                   languageCode,
                   documentNumber,
                   docFilter,
                   fileNameQrd,
                   fileNameMatchRuleBook,
                   fileNameDocumentTypeNames,
                   stopWordFilterLen=6,
                   start=0,
                   end=10,
                   isPackageLeaflet=False,
                   medName=None):

    '''
    This function was created to run the match function on multiple documents in automated fashion.
    
    '''

    if documentNumber == 0:
        topHeadingsConsidered=4
        bottomHeadingsConsidered=6
    elif documentNumber == 1:
        topHeadingsConsidered=3
        bottomHeadingsConsidered=5
    elif documentNumber == 2:
        topHeadingsConsidered=5
        bottomHeadingsConsidered=15
    else:
        topHeadingsConsidered=5
        bottomHeadingsConsidered=10
                
    path_json = os.path.join(os.path.abspath(os.path.join('..')), 'data', 'partitionedJSONs',f'{languageCode}')
    print(path_json)
    _,_,fileNames = next(os.walk(path_json))
    print(fileNames)
    filteredNames = [ fileName for fileName in fileNames if docFilter in fileName]
    filteredNames = filteredNames[start:end]
    print(filteredNames)
    for fileNameDoc in filteredNames:
        
        print(f"Starting Heading Extraction For File :- {fileNameDoc}")
        matchDocObj = MatchDocument(procedureType,
                 languageCode,
                 documentNumber,
                 fileNameDoc,
                 fileNameQrd,
                 fileNameMatchRuleBook,
                 fileNameDocumentTypeNames,
                 topHeadingsConsidered,
                 bottomHeadingsConsidered,
                 stopWordFilterLen,
                 stopWordlanguage,
                 isPackageLeaflet,
                 medName)
        df, coll = matchDocObj.matchHtmlHeaddingsWithQrd()
        
        print(f"Completed Heading Extraction For File")
        
        print(f"Starting Document Annotation For File :- {fileNameDoc}")        
        documentAnnotationObj = DocumentAnnotation(fileNameDoc,'c20835db4b1b4e108828a8537ff41506','https://spor-sit.azure-api.net/pms/api/v2/',df,coll)
        try:
            documentAnnotationObj.processRegulatedAuthorizationForDoc()
        
        except:
            print("Error Found")
            
        print(f"Completed Document Annotation For File")        

        print(f"Starting Extracting Content Between Heading For File :- {fileNameDoc}")        

        extractContentlogger = loggerCreator('ExtractContentBetween_'+ getRandomString(1))
        extractorObj = DataBetweenHeadingsExtractor(extractContentlogger, coll, languageCode)
        dfExtractedHierRR = extractorObj.extractContentBetweenHeadings(fileNameDoc)
        
        print(f"Completed Extracting Content Between Heading")        
        
        xmlLogger = loggerCreator('XmlGeneration_'+ getRandomString(1))
        fhirXmlGeneratorObj = FhirXmlGenerator(xmlLogger)
        fileNameXml = fileNameDoc.replace('.json','.xml')
        fhirXmlGeneratorObj.generateXml(dfExtractedHierRR,  fileNameXml)
        
        print(f"Created XML File For :- {fileNameDoc}")        


        return df,coll,dfExtractedHierRR


In [251]:
####
#Comman Parameters
procedureType = 'CAP'
fileNameQrd = 'qrd_canonical_mode_CAP_NAP.csv'
fileNameMatchRuleBook = 'ruleDict.json'
fileNameDocumentTypeNames = 'documentTypeNames.json'

## Spanish

In [252]:
languageCode = 'es'

### Kalydeco



### SmPC

In [253]:
documentNumber = 0
docFilter = "SmPC.json"
stopWordFilterLen = 6
stopWordlanguage = 'spanish'
start=0
end=1

In [254]:
a,b,c = parseDocuments(procedureType,
               languageCode,
               documentNumber,
               docFilter,
               fileNameQrd,
               fileNameMatchRuleBook,
               fileNameDocumentTypeNames,
               stopWordFilterLen,
               start,
               end
              )

F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\es
['emea-combined-h-2494-es_ PROSPECTO.json', 'emea-combined-h-2494-es_ANEXO II.json', 'emea-combined-h-2494-es_ANEXO III.json', 'emea-combined-h-2494-es_SmPC.json']
['emea-combined-h-2494-es_SmPC.json']
Starting Heading Extraction For File :- emea-combined-h-2494-es_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\es\emea-combined-h-2494-es_SmPC.json
--------------------------------------------
SmPC
----------------------------------
RemovedByStyle  ||    ||  FICHA TÉCNICA O RESUMEN DE LAS CARACTERÍSTICAS DEL PRODUCTO  ||  FICHA TÉCNICA O RESUMEN DE LAS CARACTERÍSTICAS DEL PRODUCTO
----------------------------------
True  ||    ||  1.       NOMBRE DEL MEDICAMENTO  ||  1. NOMBRE DEL MEDICAMENTO
True  ||    ||  2.       COMPOSICIÓN CUALITATIVA Y CUANTITATIVA  ||  2. COMPOSICIÓN CUALITATIVA Y CUANTITATIVA
True  ||  <=4|10.0|(95, 90, 95)|0.99|  ||  Excipien

True  ||    ||  Biotransformación  ||  Biotransformación
True  ||    ||  Eliminación  ||  Eliminación
True  ||    ||  Linealidad/No linealidad  ||  Linealidad/No linealidad
True  ||    ||  5.3     Datos preclínicos sobre seguridad  ||  5.3 Datos preclínicos sobre seguridad
True  ||    ||  6.       DATOS FARMACÉUTICOS  ||  6. DATOS FARMACÉUTICOS
True  ||    ||  6.1     Lista de excipientes  ||  6.1 Lista de excipientes
True  ||    ||  6.2     Incompatibilidades  ||  6.2 Incompatibilidades
True  ||    ||  6.3     Periodo de validez  ||  6.3 Periodo de validez
True  ||    ||  6.4     Precauciones especiales de conservación  ||  6.4 Precauciones especiales de conservación
True  ||  SpecialCase1|178.38|(47, 84, 86)|0.7|  ||  6.5     Naturaleza y contenido del envase  ||  6.5 Naturaleza y contenido del envase <y de los equipos especiales para su utilización, administración o implantación>
True  ||  SpecialCase2|7.41|(96, 94, 97)|0.99|  ||  6.6     Precauciones especiales de eliminación y otr

2021-04-26 00:32:46,040 : ExtractContentBetween_G : Cleaning Match Results
2021-04-26 00:32:46,044 : ExtractContentBetween_G : Finished Cleaning Match Results
2021-04-26 00:32:46,090 : ExtractContentBetween_G : Extracting Content Between Headings
2021-04-26 00:32:46,120 : ExtractContentBetween_G : Finished Extracting Content Between Headings


Error Found
Completed Document Annotation For File
Starting Extracting Content Between Heading For File :- emea-combined-h-2494-es_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\es\emea-combined-h-2494-es_SmPC.json
--------------------------------------------


Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id,Text,Html_betw
0,818,24003,CAP,1.0,NOMBRE DEL MEDICAMENTO,24001,1. NOMBRE DEL MEDICAMENTO,34,22eb5171-2d64-438d-9224-45dfa9b8a3a3,0,,\n1. NOMBRE DEL MEDICAMENTO\n \nKalydeco 75 mg comprimidos recubiertos con película\nKalydeco 150 mg comprimidos recubiertos con película\n \n \n,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;page-break-after: avoid""><b><span lang=""ES"">1. </span></b><b><span lang=""ES"">NOMBRE DEL MEDICAMENTO</span></b></p><p clas..."
1,819,24004,CAP,2.0,COMPOSICIÓN CUALITATIVA Y CUANTITATIVA,24001,2. COMPOSICIÓN CUALITATIVA Y CUANTITATIVA,41,581766ec-6082-45f8-b43f-7773f8a56370,0,24003.0,\n2. COMPOSICIÓN CUALITATIVA Y CUANTITATIVA\n \nKalydeco 75 mg comprimidos recubiertos con película\n \nCada comprimido recubierto con película contiene 75 mg de ivacaftor.\n,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;page-break-after: avoid""><b><span lang=""ES"">2. </span></b><b><span lang=""ES"">COMPOSICIÓN CUALITATIVA Y CUANTITATIVA</span..."
2,822,24007,CAP,,Excipiente(s) con efecto conocido,24006,Excipiente con efecto conocido,47,ebaf453a-d752-4721-b591-eba75bfe6e44,0,24004.0,"\nExcipiente con efecto conocido\n \nCada comprimido recubierto con película contiene 83,6 mg de lactosa monohidrato.\n \nKalydeco 150 mg comprimidos recubiertos con película\n \nCada comprimido r...","<p class=""MsoNormal"" style=""page-break-after:avoid""><i><span lang=""ES"">Excipiente con efecto conocido</span></i></p><p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p>..."
3,822,24007,CAP,,Excipiente(s) con efecto conocido,24006,Excipiente con efecto conocido,55,c54374fd-8276-4bab-b84e-76bea3537392,0,24004.0,"\nExcipiente con efecto conocido\n \nCada comprimido recubierto con película contiene 167,2 mg de lactosa monohidrato.\n \nPara consultar la lista completa de excipientes, ver sección 6.1.\n \n","<p class=""MsoNormal"" style=""page-break-after:avoid""><i><span lang=""ES"">Excipiente con efecto conocido</span></i></p><p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p>..."
4,823,24008,CAP,3.0,FORMA FARMACÉUTICA,24001,3. FORMA FARMACÉUTICA,62,0cdac076-ce03-4a52-bf4b-ab5c998741a8,0,24004.0,\n3. FORMA FARMACÉUTICA\n \nComprimido recubierto con película (comprimido)\n \nKalydeco 75 mg comprimidos recubiertos con película\n \nComprimidos recubiertos con película con forma de cáps...,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;page-break-after: avoid""><b><span lang=""ES"">3. </span></b><b><span lang=""ES"">FORMA FARMACÉUTICA</span></b></p><p class=""M..."
5,824,24009,CAP,4.0,DATOS CLÍNICOS,24001,4. Datos clínicos,75,a5870cf1-b372-4423-8f6d-fb2146604d19,0,24008.0,\n4. Datos clínicos\n,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;page-break-after: avoid""><b><span lang=""ES"" style=""text-transform:uppercase"">4. </span></b><b><span lang=""ES"" style=""text..."
6,825,24010,CAP,4.1,Indicaciones terapéuticas,24009,4.1 Indicaciones terapéuticas,77,ccffad88-631d-420c-8fe8-584a413e43f2,0,24009.0,"\n4.1 Indicaciones terapéuticas\n \nKalydeco comprimidos está indicado:\n· En monoterapia para el tratamiento de adultos, adolescentes y niños de 6 años o mayores con un peso de 25 kg o...","<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;page-break-after: avoid""><b><span lang=""ES"">4.1 </span></b><b><span lang=""ES"">Indicaciones terapéuticas</span></b></p><p cl..."
7,826,24011,CAP,4.2,Posología y forma de administración,24009,4.2 Posología y forma de administración,86,dbeba141-aecf-4220-a9e3-a33e23c0f518,0,24009.0,\n4.2 Posología y forma de administración\n \nÚnicamente los médicos con experiencia en el tratamiento de la fibrosis quística deben prescribir Kalydeco. Si se desconoce el genotipo del pacien...,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;page-break-after: avoid""><b><span lang=""ES"">4.2 </span></b><b><span lang=""ES"">Posología y forma de administración</span></b..."
8,827,24012,CAP,,Posología,24011,Posología,90,f401171f-d636-49cc-80ff-a2b898b2b2ab,0,24011.0,"\nPosología\n \nLos adultos, adolescentes y niños de 6 años de edad o mayores deben ser tratados según la Tabla 1.\n \nTabla 1: Recomendaciones posológicas\n Mañana Noche Ivacaftor e...","<p class=""MsoNormal"" style=""page-break-after:avoid""><u><span lang=""ES"">Posología</span></u></p><p class=""MsoNormal"" style=""page-break-after:avoid""><u><span lang=""ES""><span style=""text-decoration:n..."
9,828,24013,CAP,,Población pediátrica,24012,Población pediátrica,135,68f76752-3278-4139-8f49-3faa254a411f,0,24012.0,"\nPoblación pediátrica\n \nNo se ha establecido la seguridad y eficacia de ivacaftor en monoterapia en niños menores de 4 meses, ni en combinación con tezacaftor/ivacaftor en niños menores de 6 añ...","<p class=""MsoNormal"" style=""page-break-after:avoid""><u><span lang=""ES"">Población pediátrica</span></u></p><p class=""labeltext"" style=""page-break-after:avoid""><span lang=""ES""> </span></p><p class=""..."


2021-04-26 00:32:46,217 : XmlGeneration_A : Initiating XML Generation


Completed Extracting Content Between Heading


2021-04-26 00:32:46,569 : XmlGeneration_A : Writing to File:emea-combined-h-2494-es_SmPC.xml


Created XML File For :- emea-combined-h-2494-es_SmPC.json


### Annex II

In [257]:
documentNumber = 1
docFilter = "ANEXO II.json"
stopWordFilterLen = 6
stopWordlanguage = 'spanish'
start=0
end=1

In [258]:
a,b,c = parseDocuments(procedureType,
               languageCode,
               documentNumber,
               docFilter,
               fileNameQrd,
               fileNameMatchRuleBook,
               fileNameDocumentTypeNames,
               stopWordFilterLen,
               start,
               end
              )

F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\es
['emea-combined-h-2494-es_ PROSPECTO.json', 'emea-combined-h-2494-es_ANEXO II.json', 'emea-combined-h-2494-es_ANEXO III.json', 'emea-combined-h-2494-es_SmPC.json']
['emea-combined-h-2494-es_ANEXO II.json']
Starting Heading Extraction For File :- emea-combined-h-2494-es_ANEXO II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\es\emea-combined-h-2494-es_ANEXO II.json
--------------------------------------------
AnnexII
True  ||    ||  ANEXO II  ||  ANEXO II
True  ||  Contains<>|131.25|(60, 96, 88)|0.8|  ||  A.         FABRICANTE(S) RESPONSABLE(S) DE LA LIBERACIÓN DE LOS LOTES  ||  A. <FABRICANTE(S) DEL (DE LOS) PRINCIPIO(S) ACTIVO(S) BIOLÓGICO(S) Y> FABRICANTE(S) RESPONSABLE(S) DE LA LIBERACIÓN DE LOS LOTES
True  ||    ||  B.         CONDICIONES O RESTRICCIONES DE SUMINISTRO Y USO  ||  B. CONDICIONES O RESTRICCIONES DE SUMINISTRO Y USO 
True  ||    ||  C.   

2021-04-26 00:32:50,323 : ExtractContentBetween_F : Cleaning Match Results
2021-04-26 00:32:50,327 : ExtractContentBetween_F : Finished Cleaning Match Results
2021-04-26 00:32:50,335 : ExtractContentBetween_F : Extracting Content Between Headings
2021-04-26 00:32:50,342 : ExtractContentBetween_F : Finished Extracting Content Between Headings


Error Found
Completed Document Annotation For File
Starting Extracting Content Between Heading For File :- emea-combined-h-2494-es_ANEXO II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\es\emea-combined-h-2494-es_ANEXO II.json
--------------------------------------------


Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id,Text,Html_betw
0,876,25001,CAP,,ANEXO II,,ANEXO II,25,05597518-c3bb-4608-8650-1a847710b389,0,,\nANEXO II\n,"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><b><span lang=""ES"">ANEXO II</span></b></p><p class=""MsoNormal"" style=""margin-right:70.8pt""><span lang=""ES""> </span></p>"
1,877,25002,CAP,A,<FABRICANTE(S) DEL (DE LOS) PRINCIPIO(S) ACTIVO(S) BIOLÓGICO(S) Y> FABRICANTE(S) RESPONSABLE(S) DE LA LIBERACIÓN DE LOS LOTES,25001.0,A. FABRICANTE(S) RESPONSABLE(S) DE LA LIBERACIÓN DE LOS LOTES,27,dd229850-cf33-43b4-bc52-c61b43d1c734,0,25001.0,\nA. FABRICANTE(S) RESPONSABLE(S) DE LA LIBERACIÓN DE LOS LOTES\n,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.95pt;margin-bottom: 0in;margin-left:.5in;margin-bottom:.0001pt;text-indent:-35.4pt""><b><span lang=""ES"">A. FABRICANTE(S) RESPONSABL..."
2,880,25005,CAP,B,CONDICIONES O RESTRICCIONES DE SUMINISTRO Y USO,25001.0,B. CONDICIONES O RESTRICCIONES DE SUMINISTRO Y USO,29,cb532494-125c-46ba-a9a7-969bcccbb4ac,0,25001.0,\nB. CONDICIONES O RESTRICCIONES DE SUMINISTRO Y USO\n,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.95pt;margin-bottom: 0in;margin-left:.5in;margin-bottom:.0001pt;text-indent:-35.4pt""><b><span lang=""ES"">B. CONDICIONES O RESTRICCIO..."
3,882,25007,CAP,C,OTRAS CONDICIONES Y REQUISITOS DE LA AUTORIZACIÓN DE COMERCIALIZACIÓN,25001.0,C. OTRAS CONDICIONES Y REQUISITOS DE LA AUTORIZACIÓN DE COMERCIALIZACIÓN,31,77193ade-37c4-4b84-b1f0-4e1b68eda956,0,25001.0,\nC. OTRAS CONDICIONES Y REQUISITOS DE LA AUTORIZACIÓN DE COMERCIALIZACIÓN\n,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.95pt;margin-bottom: 0in;margin-left:.5in;margin-bottom:.0001pt;text-indent:-35.4pt""><b><span lang=""ES"">C. OTRAS CONDICIONES Y REQU..."
4,884,25009,CAP,D,CONDICIONES O RESTRICCIONES EN RELACIÓN CON LA UTILIZACIÓN SEGURA Y EFICAZ DEL MEDICAMENTO,25001.0,D. CONDICIONES O RESTRICCIONES EN RELACIÓN CON LA UTILIZACIÓN SEGURA Y EFICAZ DEL MEDICAMENTO,33,c8284068-f980-4ffa-b847-5c2974d08fde,0,25001.0,\nD. CONDICIONES O RESTRICCIONES EN RELACIÓN CON LA UTILIZACIÓN SEGURA Y EFICAZ DEL MEDICAMENTO\n \n \n \n\n \n,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.95pt;margin-bottom: 0in;margin-left:.5in;margin-bottom:.0001pt;text-indent:-35.4pt""><b><span lang=""ES"">D. CONDICIONES O RESTRICCIO..."
5,877,25002,CAP,A,<FABRICANTE(S) DEL (DE LOS) PRINCIPIO(S) ACTIVO(S) BIOLÓGICO(S) Y> FABRICANTE(S) RESPONSABLE(S) DE LA LIBERACIÓN DE LOS LOTES,25001.0,A. FABRICANTE(S) RESPONSABLE(S) DE LA LIBERACIÓN DE LOS LOTES,40,afe4386d-f9ed-4566-b946-877ff6aab9c1,1,,\nA. FABRICANTE(S) RESPONSABLE(S) DE LA LIBERACIÓN DE LOS LOTES\n,"<p class=""TitleB""><span lang=""ES"">A. FABRICANTE(S) RESPONSABLE(S) DE LA LIBERACIÓN DE LOS LOTES</span></p><p class=""MsoNormal"" style=""margin-right:70.8pt""><span lang=""ES""> </span></p>"
6,879,25004,CAP,,Nombre y dirección del (de los) fabricante(s) responsable(s) de la liberación de los lotes,25002.0,Nombre y dirección del (de los) fabricante(s) responsable(s) de la liberación de los lotes,42,c22cc6eb-0ffa-4de9-bae6-ab2e4bbacabc,1,25002.0,\nNombre y dirección del (de los) fabricante(s) responsable(s) de la liberación de los lotes\n \nAlmac Pharma Services (Ireland) Limited\nFinnabair Industrial Estate\nDundalk\nCo. Louth\nA91 P9KD\...,"<p class=""MsoNormal""><u><span lang=""ES"">Nombre y dirección del (de los) fabricante(s) responsable(s) de la liberación de los lotes</span></u></p><p class=""MsoNormal""><span lang=""ES""> </span></p><p..."
7,880,25005,CAP,B,CONDICIONES O RESTRICCIONES DE SUMINISTRO Y USO,25001.0,B. CONDICIONES O RESTRICCIONES DE SUMINISTRO Y USO,60,c0067f9f-4b09-4cc7-be8b-6a307a626759,1,25004.0,"\nB. CONDICIONES O RESTRICCIONES DE SUMINISTRO Y USO\n \nMedicamento sujeto a prescripción médica restringida (ver Anexo I: Ficha Técnica o Resumen de las Características del Producto, secci...","<p class=""TitleB""><span lang=""ES"">B. CONDICIONES O RESTRICCIONES DE SUMINISTRO Y USO</span></p><p class=""MsoNormal""><span lang=""ES""> </span></p><p class=""MsoNormal""><span lang=""ES"">Medicamen..."
8,882,25007,CAP,C,OTRAS CONDICIONES Y REQUISITOS DE LA AUTORIZACIÓN DE COMERCIALIZACIÓN,25001.0,C. OTRAS CONDICIONES Y REQUISITOS DE LA AUTORIZACIÓN DE COMERCIALIZACIÓN,65,63c94e11-257c-49dd-9c5e-d3e4bb0e1289,1,25005.0,\nC. OTRAS CONDICIONES Y REQUISITOS DE LA AUTORIZACIÓN DE COMERCIALIZACIÓN\n,"<p class=""TitleB""><span lang=""ES"">C. OTRAS CONDICIONES Y REQUISITOS DE LA AUTORIZACIÓN DE COMERCIALIZACIÓN</span></p><p class=""MsoNormal"" style=""margin-right:28.35pt""><span lang=""ES""> </span..."
9,883,25008,CAP,,Informes periódicos de seguridad (IPSs),25007.0,· Informes periódicos de seguridad (IPSs),67,26d6950a-4ddc-4638-a0bd-65d9c39587c4,1,25007.0,\n· Informes periódicos de seguridad (IPSs)\n \nLos requerimientos para la presentación de los IPSs para este medicamento se establecen en la lista de fechas de referencia de la Unión ...,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.05pt;margin-bottom: 0in;margin-left:27.0pt;margin-bottom:.0001pt;text-indent:-27.0pt;line-height: 13.0pt""><span lang=""ES"" style=""font-fami..."


2021-04-26 00:32:50,369 : XmlGeneration_A : Initiating XML Generation
2021-04-26 00:32:50,369 : XmlGeneration_A : Initiating XML Generation
2021-04-26 00:32:50,407 : XmlGeneration_A : Writing to File:emea-combined-h-2494-es_ANEXO II.xml
2021-04-26 00:32:50,407 : XmlGeneration_A : Writing to File:emea-combined-h-2494-es_ANEXO II.xml


Completed Extracting Content Between Heading
Created XML File For :- emea-combined-h-2494-es_ANEXO II.json


### Labeling

In [259]:
documentNumber = 2
docFilter = "ANEXO III.json"
stopWordFilterLen = 6
stopWordlanguage = 'spanish'
start=0
end=1

In [260]:
a,b,c = parseDocuments(procedureType,
               languageCode,
               documentNumber,
               docFilter,
               fileNameQrd,
               fileNameMatchRuleBook,
               fileNameDocumentTypeNames,
               stopWordFilterLen,
               start,
               end
              )

F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\es
['emea-combined-h-2494-es_ PROSPECTO.json', 'emea-combined-h-2494-es_ANEXO II.json', 'emea-combined-h-2494-es_ANEXO III.json', 'emea-combined-h-2494-es_SmPC.json']
['emea-combined-h-2494-es_ANEXO III.json']
Starting Heading Extraction For File :- emea-combined-h-2494-es_ANEXO III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\es\emea-combined-h-2494-es_ANEXO III.json
--------------------------------------------
Labelling
True  ||  <=1|23.08|(87, 100, 95)|0.95|  ||  A. ETIQUETADO  ||  ETIQUETADO
True  ||  Contains<>|92.86|(68, 90, 86)|0.88|  ||  INFORMACIÓN QUE DEBE FIGURAR EN EL EMBALAJE EXTERIOR  ||  INFORMACIÓN QUE DEBE FIGURAR EN <EL EMBALAJE EXTERIOR><Y><EL ACONDICIONAMIENTO PRIMARIO>
True  ||    ||  1.       NOMBRE DEL MEDICAMENTO  ||  1. NOMBRE DEL MEDICAMENTO
True  ||    ||  2.       PRINCIPIO(S) ACTIVO(S)  ||  2. PRINCIPIO(S) ACTIVO(S)
True  ||   

True  ||    ||  8.       FECHA DE CADUCIDAD  ||  8. FECHA DE CADUCIDAD
True  ||    ||  9.       CONDICIONES ESPECIALES DE CONSERVACIÓN  ||  9. CONDICIONES ESPECIALES DE CONSERVACIÓN
True  ||    ||  10.     PRECAUCIONES ESPECIALES DE ELIMINACIÓN DEL MEDICAMENTO NO UTILIZADO Y DE LOS MATERIALES DERIVADOS DE SU USO, CUANDO CORRESPONDA  ||  10. PRECAUCIONES ESPECIALES DE ELIMINACIÓN DEL MEDICAMENTO NO UTILIZADO Y DE LOS MATERIALES DERIVADOS DE SU USO, CUANDO CORRESPONDA
True  ||    ||  11.     NOMBRE Y DIRECCIÓN DEL TITULAR DE LA AUTORIZACIÓN DE COMERCIALIZACIÓN  ||  11. NOMBRE Y DIRECCIÓN DEL TITULAR DE LA AUTORIZACIÓN DE COMERCIALIZACIÓN
True  ||    ||  12.     NÚMERO(S) DE AUTORIZACIÓN DE COMERCIALIZACIÓN  ||  12. NÚMERO(S) DE AUTORIZACIÓN DE COMERCIALIZACIÓN 
True  ||  Contains<>|166.67|(48, 83, 86)|0.83|  ||  13.     NÚMERO DE LOTE  ||  13. NÚMERO DE LOTE <, CÓDIGO DE DONACIÓN Y DEL PRODUCTO >
True  ||    ||  14.     CONDICIONES GENERALES DE DISPENSACIÓN  ||  14. CONDICIONES GENERALES

True  ||    ||  3.       FECHA DE CADUCIDAD  ||  3. FECHA DE CADUCIDAD
True  ||  Contains<>|176.47|(46, 82, 86)|0.82|  ||  4.       NÚMERO DE LOTE  ||  4. NÚMERO DE LOTE <, CÓDIGO DE DONACIÓN Y DEL PRODUCTO >
True  ||    ||  5.       OTROS  ||  5. OTROS
oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
True  ||  Contains<>|92.86|(68, 90, 86)|0.88|  ||  INFORMACIÓN QUE DEBE FIGURAR EN EL EMBALAJE EXTERIOR  ||  INFORMACIÓN QUE DEBE FIGURAR EN <EL EMBALAJE EXTERIOR><Y><EL ACONDICIONAMIENTO PRIMARIO>
True  ||    ||  1.       NOMBRE DEL MEDICAMENTO  ||  1. NOMBRE DEL MEDICAMENTO
True  ||    ||  2.       PRINCIPIO(S) ACTIVO(S)  ||  2. PRINCIPIO(S) ACTIVO(S)
True  ||    ||  3.       LISTA DE EXCIPIENTES  ||  3. LISTA DE EXCIPIENTES
True  ||    ||  4.       FORMA FARMACÉUTICA Y CONTENIDO DEL ENVASE  ||  4. FORMA FARMACÉUTICA Y CONTENIDO DEL ENVASE
True  ||    ||  5.       FORMA Y VÍA(S) DE ADMINISTRACIÓN  ||  5. FORMA Y VÍA

True  ||    ||  17.     IDENTIFICADOR ÚNICO - CÓDIGO DE BARRAS 2D  ||  17. IDENTIFICADOR ÚNICO - CÓDIGO DE BARRAS 2D

OriginalCheck
('<=7|92.31|(13, 12, 91)|0.35|', 'Incluido el código de barras 2D que lleva el identificador único.', '17. IDENTIFICADOR ÚNICO - CÓDIGO DE BARRAS 2D')

True  ||    ||  18.     IDENTIFICADOR ÚNICO - INFORMACIÓN EN CARACTERES VISUALES  ||  18. IDENTIFICADOR ÚNICO - INFORMACIÓN EN CARACTERES VISUALES
-------------------Here1------------------------
oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
True  ||  Contains<>|69.81|(70, 64, 86)|0.92|  ||  INFORMACIÓN QUE DEBE FIGURAR EN EL ACONDICIONAMIENTO INTERMEDIO  ||  INFORMACIÓN QUE DEBE FIGURAR EN <EL EMBALAJE EXTERIOR><Y><EL ACONDICIONAMIENTO PRIMARIO>
True  ||    ||  1.       NOMBRE DEL MEDICAMENTO  ||  1. NOMBRE DEL MEDICAMENTO
True  ||    ||  2.       PRINCIPIO(S) ACTIVO(S)  ||  2. PRINCIPIO(S) ACTIVO(S)
True  ||    ||  3.       LISTA D

True  ||    ||  9.       CONDICIONES ESPECIALES DE CONSERVACIÓN  ||  9. CONDICIONES ESPECIALES DE CONSERVACIÓN
True  ||    ||  10.     PRECAUCIONES ESPECIALES DE ELIMINACIÓN DEL MEDICAMENTO NO UTILIZADO Y DE LOS MATERIALES DERIVADOS DE SU USO, CUANDO CORRESPONDA  ||  10. PRECAUCIONES ESPECIALES DE ELIMINACIÓN DEL MEDICAMENTO NO UTILIZADO Y DE LOS MATERIALES DERIVADOS DE SU USO, CUANDO CORRESPONDA
True  ||    ||  11.     NOMBRE Y DIRECCIÓN DEL TITULAR DE LA AUTORIZACIÓN DE COMERCIALIZACIÓN  ||  11. NOMBRE Y DIRECCIÓN DEL TITULAR DE LA AUTORIZACIÓN DE COMERCIALIZACIÓN
True  ||    ||  12.     NÚMERO(S) DE AUTORIZACIÓN DE COMERCIALIZACIÓN  ||  12. NÚMERO(S) DE AUTORIZACIÓN DE COMERCIALIZACIÓN 
True  ||  Contains<>|166.67|(48, 83, 86)|0.83|  ||  13.     NÚMERO DE LOTE  ||  13. NÚMERO DE LOTE <, CÓDIGO DE DONACIÓN Y DEL PRODUCTO >
True  ||    ||  14.     CONDICIONES GENERALES DE DISPENSACIÓN  ||  14. CONDICIONES GENERALES DE DISPENSACIÓN
True  ||    ||  15.     INSTRUCCIONES DE USO  ||  15. 

True  ||    ||  3.       FECHA DE CADUCIDAD  ||  3. FECHA DE CADUCIDAD
True  ||  Contains<>|176.47|(46, 82, 86)|0.82|  ||  4.       NÚMERO DE LOTE  ||  4. NÚMERO DE LOTE <, CÓDIGO DE DONACIÓN Y DEL PRODUCTO >
True  ||    ||  5.       CONTENIDO EN PESO, EN VOLUMEN O EN UNIDADES  ||  5. CONTENIDO EN PESO, EN VOLUMEN O EN UNIDADES
True  ||    ||  6.       OTROS  ||  6. OTROS


2021-04-26 00:34:03,727 : ExtractContentBetween_N : Cleaning Match Results
2021-04-26 00:34:03,734 : ExtractContentBetween_N : Finished Cleaning Match Results
2021-04-26 00:34:03,776 : ExtractContentBetween_N : Extracting Content Between Headings
2021-04-26 00:34:04,392 : ExtractContentBetween_N : Finished Extracting Content Between Headings



All mandatory headings have been found !!!

dict_keys([])
Completed Heading Extraction For File
Starting Document Annotation For File :- emea-combined-h-2494-es_ANEXO III.json
Error Found
Completed Document Annotation For File
Starting Extracting Content Between Heading For File :- emea-combined-h-2494-es_ANEXO III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\es\emea-combined-h-2494-es_ANEXO III.json
--------------------------------------------


Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id,Text,Html_betw
0,889,26001,CAP,,ETIQUETADO,,A. ETIQUETADO,54,116b4cde-ec50-452b-b0cc-9ff2ab25d63d,0,,\nA. ETIQUETADO\n \n \n\n \n,"<p class=""TitleA""><span lang=""ES"">A. ETIQUETADO</span></p><p class=""MsoNormal""><span lang=""ES""> </span></p><span lang=""ES"" style='font-size:11.0pt;font-family:""Times New Roman"",serif; color:black'..."
1,890,26002,CAP,,INFORMACIÓN QUE DEBE FIGURAR EN <EL EMBALAJE EXTERIOR><Y><EL ACONDICIONAMIENTO PRIMARIO>,26001.0,INFORMACIÓN QUE DEBE FIGURAR EN EL EMBALAJE EXTERIOR,60,d99441cb-bcec-4a1e-9e51-138cce8aa64d,0,26001.0,\nINFORMACIÓN QUE DEBE FIGURAR EN EL EMBALAJE EXTERIOR\n \nCAJA DEL BLÍSTER – ENVASE DE 56 COMPRIMIDOS\n \n \n,"<p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p><p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p><div style=""border:solid windowtext ..."
2,891,26003,CAP,1.0,NOMBRE DEL MEDICAMENTO,26002.0,1. NOMBRE DEL MEDICAMENTO,66,841a4962-31a9-4a1e-bf88-930938a84fdb,0,26002.0,\n1. NOMBRE DEL MEDICAMENTO\n \nKalydeco 150 mg comprimidos recubiertos con película\nivacaftor\n \n \n,"<p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p><p class=""MsoNormal""><span lang=""ES"">Kalydeco 150 mg comprimidos recubiertos con película</span></p><p class=""MsoNor..."
3,892,26004,CAP,2.0,PRINCIPIO(S) ACTIVO(S),26002.0,2. PRINCIPIO(S) ACTIVO(S),73,8b468e9a-e563-4918-a13e-07f9ab3660cf,0,26002.0,\n2. PRINCIPIO(S) ACTIVO(S)\n \nCada comprimido contiene 150 mg de ivacaftor.\n \n \n,"<p class=""MsoNormal"" style=""page-break-after:avoid""><i><span lang=""PT""> </span></i></p><p class=""MsoNormal""><span lang=""ES"">Cada comprimido contiene 150 mg de ivacaftor.</span></p><p class=""MsoNor..."
4,893,26005,CAP,3.0,LISTA DE EXCIPIENTES,26002.0,3. LISTA DE EXCIPIENTES,79,487deb81-4e89-4c01-9e31-56a68072c885,0,26002.0,\n3. LISTA DE EXCIPIENTES\n \nContiene lactosa.\n \nPara mayor información consultar el prospecto.\n \n \n,"<p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p><p class=""MsoNormal""><span lang=""ES"">Contiene lactosa.</span></p><p class=""MsoNormal""><span lang=""ES""> </span></p><p..."
5,894,26006,CAP,4.0,FORMA FARMACÉUTICA Y CONTENIDO DEL ENVASE,26002.0,4. FORMA FARMACÉUTICA Y CONTENIDO DEL ENVASE,87,6669484f-7b8c-44c8-9df2-3d06381f0e18,0,26002.0,\n4. FORMA FARMACÉUTICA Y CONTENIDO DEL ENVASE\n \n56 comprimidos\n \n \n,"<p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p><p class=""MsoNormal""><span lang=""ES"">56 comprimidos</span></p><p class=""MsoNormal""><span lang=""ES""> </span></p><p cl..."
6,895,26007,CAP,5.0,FORMA Y VÍA(S) DE ADMINISTRACIÓN,26002.0,5. FORMA Y VÍA(S) DE ADMINISTRACIÓN,93,7b2a56b5-8c36-40b0-aa1b-fde88f977238,0,26002.0,\n5. FORMA Y VÍA(S) DE ADMINISTRACIÓN\n \nLeer el prospecto antes de utilizar este medicamento.\n \nVía oral\n \nInstrucciones de uso\n \nTome el medicamento con alimentos que contengan gras...,"<p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p><p class=""MsoNormal""><span lang=""ES"">Leer el prospecto antes de utilizar este medicamento.</span></p><p class=""MsoNo..."
7,896,26008,CAP,6.0,ADVERTENCIA ESPECIAL DE QUE EL MEDICAMENTO DEBE MANTENERSE FUERA DE LA VISTA Y DEL ALCANCE DE LOS NIÑOS,26002.0,6. ADVERTENCIA ESPECIAL DE QUE EL MEDICAMENTO DEBE MANTENERSE FUERA DE LA VISTA Y DEL ALCANCE DE LOS NIÑOS,107,60b5c8e6-b1ab-4acf-aedf-9f582d13bcf8,0,26002.0,\n6. ADVERTENCIA ESPECIAL DE QUE EL MEDICAMENTO DEBE MANTENERSE FUERA DE LA VISTA Y DEL ALCANCE DE LOS NIÑOS\n \nMantener fuera de la vista y del alcance de los niños.\n \n \n,"<p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p><p class=""MsoNormal""><span lang=""ES"">Mantener fuera de la vista y del alcance de los niños.</span></p><p class=""MsoN..."
8,897,26009,CAP,7.0,"OTRA(S) ADVERTENCIA(S) ESPECIAL(ES), SI ES NECESARIO",26002.0,"7. OTRA(S) ADVERTENCIA(S) ESPECIAL(ES), SI ES NECESARIO",113,d5b5d494-2daa-45c4-902b-285ecdf71652,0,26002.0,"\n7. OTRA(S) ADVERTENCIA(S) ESPECIAL(ES), SI ES NECESARIO\n \n \n","<p class=""MsoNormal""><span lang=""ES""> </span></p><p class=""MsoNormal""><span lang=""ES""> </span></p><div style=""border:solid windowtext 1.0pt;padding:1.0pt 4.0pt 1.0pt 4.0pt""> <p class=""MsoNormal"" s..."
9,898,26010,CAP,8.0,FECHA DE CADUCIDAD,26002.0,8. FECHA DE CADUCIDAD,117,5994e385-9c26-49aa-a58a-9a9bf4c3cede,0,26002.0,\n8. FECHA DE CADUCIDAD\n \nCAD\n \n \n,"<p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p><p class=""MsoNormal""><span lang=""ES"">CAD</span></p><p class=""MsoNormal""><span lang=""ES""> </span></p><p class=""MsoNor..."


2021-04-26 00:34:04,640 : XmlGeneration_y : Initiating XML Generation


Completed Extracting Content Between Heading


2021-04-26 00:34:05,157 : XmlGeneration_y : Writing to File:emea-combined-h-2494-es_ANEXO III.xml


Created XML File For :- emea-combined-h-2494-es_ANEXO III.json


### Package Leaflet

In [261]:

documentNumber = 3
docFilter = "PROSPECTO.json"
stopWordFilterLen = 100
stopWordlanguage = 'spanish'
start=0
end=1
isPackageLeaflet=True

In [262]:
a,b,c = parseDocuments(procedureType,
               languageCode,
               documentNumber,
               docFilter,
               fileNameQrd,
               fileNameMatchRuleBook,
               fileNameDocumentTypeNames,
               stopWordFilterLen,
               start,
               end,
               isPackageLeaflet,
               'Kalydeco'
              )

F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\es
['emea-combined-h-2494-es_ PROSPECTO.json', 'emea-combined-h-2494-es_ANEXO II.json', 'emea-combined-h-2494-es_ANEXO III.json', 'emea-combined-h-2494-es_SmPC.json']
['emea-combined-h-2494-es_ PROSPECTO.json']
Starting Heading Extraction For File :- emea-combined-h-2494-es_ PROSPECTO.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\es\emea-combined-h-2494-es_ PROSPECTO.json
--------------------------------------------
Package leaflet
True  ||  <=1|25.0|(86, 100, 95)|0.95|  ||  B. PROSPECTO  ||  PROSPECTO
True  ||    ||  Contenido del prospecto  ||  Contenido del prospecto
----------------------------------
RemovedByStyle  ||    ||  1.    Qué es Kalydeco y para qué se utiliza  ||  1. Qué es Kalydeco y para qué se utiliza
----------------------------------
----------------------------------
RemovedByStyle  ||  Contains<>|14.55|(93, 85, 95)|0.98|  ||  2.    Qué 

True  ||  Contains<>|38.46|(78, 75, 88)|0.86|  ||  Efectos adversos adicionales en niños y adolescentes  ||  Otros efectos adversos en niños <y adolescentes>
True  ||    ||  Comunicación de efectos adversos  ||  Comunicación de efectos adversos
True  ||    ||  5.            Conservación de Kalydeco  ||  5. Conservación de Kalydeco
True  ||    ||  6.            Contenido del envase e información adicional  ||  6. Contenido del envase e información adicional
True  ||    ||  Composición de Kalydeco  ||  Composición de Kalydeco 
True  ||    ||  Aspecto del producto y contenido del envase  ||  Aspecto del producto y contenido del envase
True  ||  Contains<>|73.91|(73, 100, 90)|0.94|  ||  Titular de la autorización de comercialización  ||  Titular de la autorización de comercialización <y responsable de la fabricación>
True  ||  SpecialCase1|54.35|(79, 100, 90)|0.96|  ||  Fecha de la última revisión de este prospecto:  ||  Fecha de la última revisión de este prospecto: <{MM/AAAA}><{mes AAAA}

2021-04-26 00:36:54,033 : ExtractContentBetween_1 : Cleaning Match Results
2021-04-26 00:36:54,038 : ExtractContentBetween_1 : Finished Cleaning Match Results
2021-04-26 00:36:54,060 : ExtractContentBetween_1 : Extracting Content Between Headings
2021-04-26 00:36:54,084 : ExtractContentBetween_1 : Finished Extracting Content Between Headings




Heading Not Found 
 ['q Este medicamento está sujeto a seguimiento adicional, lo que agilizará la detección de nueva información sobre su seguridad. Puede contribuir comunicando los efectos adversos que pudiera usted tener. La parte final de la sección 4 incluye información sobre cómo comunicar estos efectos adversos.', 'X contiene {nombre del (de los) excipiente(s)}', 'Pueden solicitar más información respecto a este medicamento dirigiéndose al representante local del titular de la autorización de comercialización:', 'Esta información está destinada únicamente a profesionales sanitarios:']


dict_keys(['1. Qué es Kalydeco y para qué se utiliza', '2. Qué necesita saber antes de empezar a <tomar><usar> Kalydeco', '3. Cómo <tomar><usar> Kalydeco', '4. Posibles efectos adversos', '5. Conservación de Kalydeco', '6. Contenido del envase e información adicional', 'Kalydeco contiene {nombre del (de los) excipiente(s)}'])
Completed Heading Extraction For File
Starting Document Annotation For

Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id,Text,Html_betw
0,922,27001,CAP,,PROSPECTO,,B. PROSPECTO,25,1f145174-0f2a-4427-a796-cc3696bf8514,0,,\nB. PROSPECTO\n\n \n\nProspecto: información para el paciente\n \nKalydeco 75 mg comprimidos recubiertos con película\nKalydeco 150 mg comprimidos recubiertos con película\nivacaftor\n \nLea todo...,"<p class=""TitleA""><span lang=""ES"">B. PROSPECTO</span></p><b><span lang=""ES"" style='font-size:11.0pt;font-family:""Times New Roman"",serif'><br clear=""all"" style=""page-break-before:always""/> </span><..."
1,924,27003,CAP,,Contenido del prospecto,27001.0,Contenido del prospecto,42,6d5c7979-0216-482a-a279-b6fc20f1630d,0,27001.0,\nContenido del prospecto\n \n1. Qué es Kalydeco y para qué se utiliza\n2. Qué necesita saber antes de empezar a tomar Kalydeco\n3. Cómo tomar Kalydeco\n4. Posibles efectos adversos\n5...,"<p class=""MsoNormal"" style=""margin-right:-.1pt;page-break-after:avoid""><b><span lang=""ES"">Contenido del prospecto</span></b></p><p class=""MsoNormal"" style=""margin-right:-.1pt;page-break-after:avoi..."
2,925,27004,CAP,1.0,Qué es X y para qué se utiliza,27001.0,1. Qué es Kalydeco y para qué se utiliza,52,4cad55a7-a9e2-4766-aaf3-f52a3e79e7f1,0,27001.0,\n1. Qué es Kalydeco y para qué se utiliza\n \nKalydeco contiene el principio activo ivacaftor. Ivacaftor actúa a nivel del regulador de la conductancia transmembrana de la fibrosis quí...,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.1pt;margin-bottom:0in; margin-left:28.5pt;margin-bottom:.0001pt;text-indent:-28.5pt;page-break-after: avoid""><b><span lang=""ES"">1.<span st..."
3,926,27005,CAP,2.0,Qué necesita saber antes de empezar a <tomar><usar> X,27001.0,2. Qué necesita saber antes de empezar a tomar Kalydeco,64,b0edf109-c643-4565-851f-2a86b9747d1a,0,27001.0,\n2. Qué necesita saber antes de empezar a tomar Kalydeco\n,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.1pt;margin-bottom:0in; margin-left:28.5pt;margin-bottom:.0001pt;text-indent:-28.5pt;page-break-after: avoid""><b><span lang=""ES"">2.<span st..."
4,927,27006,CAP,,No <tome><use> X,27005.0,No tome Kalydeco,66,da7e8ad2-adef-4174-a503-896dceaf4964,0,27005.0,\nNo tome Kalydeco\n \n· si es alérgico a ivacaftor o a alguno de los demás componentes de este medicamento (incluidos en la sección 6).\n,"<p class=""MsoNormal"" style=""page-break-after:avoid""><b><span lang=""ES"">No tome Kalydeco</span></b></p><p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p><p class=""MsoN..."
5,928,27007,CAP,,Advertencias y precauciones,27005.0,Advertencias y precauciones,70,7ea1e668-9c52-4600-a19a-5551e396c687,0,27005.0,\nAdvertencias y precauciones\n \n· Consulte a su médico si tiene o ha tenido anteriormente problemas hepáticos. Puede ser necesario que su médico le ajuste la dosis.\n· Se ha observ...,"<p class=""MsoNormal"" style=""page-break-after:avoid""><b><span lang=""ES"">Advertencias y precauciones</span></b></p><p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p><p ..."
6,929,27008,CAP,,Niños <y adolescentes>,27005.0,Niños y adolescentes,92,2ea93de3-659c-4dfb-ad23-975194649bab,0,27005.0,"\nNiños y adolescentes\n \nEste medicamento no se debe dar a niños menores de 4 meses, ya que se desconoce si ivacaftor es seguro y eficaz en estos niños.\n \nEste medicamento no se debe dar en co...","<p class=""MsoNormal"" style=""page-break-after:avoid""><b><span lang=""ES"">Niños y adolescentes</span></b></p><p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p><p class=""..."
7,930,27009,CAP,,Otros medicamentos y X,27005.0,Otros medicamentos y Kalydeco,98,4fe74f77-a3aa-41c8-810d-0bfe27ea4bd5,0,27005.0,"\nOtros medicamentos y Kalydeco\n \nInforme a su médico o farmacéutico si está utilizando, ha utilizado recientemente o pudiera tener que utilizar cualquier otro medicamento. Algunos medicamentos ...","<p class=""MsoNormal"" style=""page-break-after:avoid""><b><span lang=""ES"">Otros medicamentos y Kalydeco</span></b></p><p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""ES""> </span></p><..."
8,931,27010,CAP,,"<Uso><Toma> de X con <alimentos><y><,><bebidas><y><alcohol>",27005.0,Toma de Kalydeco con alimentos y bebidas,112,55389311-07e7-4374-981b-3b00aa2a4cf7,0,27005.0,\nToma de Kalydeco con alimentos y bebidas\n \nEvite los alimentos o bebidas que contengan pomelo durante el tratamiento con Kalydeco ya que pueden aumentar los efectos adversos de Kalydeco al aum...,"<p class=""MsoNormal"" style=""margin-right:-.1pt;page-break-after:avoid""><b><span lang=""ES"">Toma de Kalydeco con alimentos y bebidas</span></b></p><p class=""labeltext"" style=""page-break-after:avoid""..."
9,932,27011,CAP,,"Embarazo <y><,> lactancia <y fertilidad>",27005.0,Embarazo y lactancia,116,db25c9cf-23ff-4705-a07e-73513d6fa78c,0,27005.0,"\nEmbarazo y lactancia\n \nSi está embarazada o en periodo de lactancia, cree que podría estar embarazada o tiene intención de quedarse embarazada, consulte a su médico antes de utilizar este medi...","<p class=""MsoNormal"" style=""margin-right:-.1pt;page-break-after:avoid""><b><span lang=""ES"">Embarazo y lactancia</span></b></p><p class=""labeltext"" style=""page-break-after:avoid""><span lang=""ES-TRAD..."


2021-04-26 00:36:54,158 : XmlGeneration_x : Initiating XML Generation


Completed Extracting Content Between Heading


2021-04-26 00:36:54,343 : XmlGeneration_x : Writing to File:emea-combined-h-2494-es_ PROSPECTO.xml


Created XML File For :- emea-combined-h-2494-es_ PROSPECTO.json


(      Bold               Classes  \
 0    False                  None   
 1    False         ['MsoNormal']   
 2    False         ['MsoNormal']   
 3    False         ['MsoNormal']   
 4    False         ['MsoNormal']   
 5    False         ['MsoNormal']   
 6    False         ['MsoNormal']   
 7    False         ['MsoNormal']   
 8    False         ['MsoNormal']   
 9    False         ['MsoNormal']   
 10   False         ['MsoNormal']   
 11   False         ['MsoNormal']   
 12   False         ['MsoNormal']   
 13   False         ['MsoNormal']   
 14   False         ['MsoNormal']   
 15   False         ['MsoNormal']   
 16   False         ['MsoNormal']   
 17   False         ['MsoNormal']   
 18   False         ['MsoNormal']   
 19   False         ['MsoNormal']   
 20   False         ['MsoNormal']   
 21   False         ['MsoNormal']   
 22   False         ['MsoNormal']   
 23   False         ['MsoNormal']   
 24   False         ['MsoNormal']   
 25    True            ['TitleA']   
 

## German

In [278]:
languageCode = 'de'

### Kalydeco



### SmPC

In [294]:
documentNumber = 0
docFilter = "SmPC.json"
stopWordFilterLen = 6
stopWordlanguage = 'german'
start=0
end=1

In [295]:
a,b,c = parseDocuments(procedureType,
               languageCode,
               documentNumber,
               docFilter,
               fileNameQrd,
               fileNameMatchRuleBook,
               fileNameDocumentTypeNames,
               stopWordFilterLen,
               start,
               end
              )

F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\de
['emea-combined-h-2494-de_ PACKUNGSBEILAGE.json', 'emea-combined-h-2494-de_ANHANG II.json', 'emea-combined-h-2494-de_ANHANG III.json', 'emea-combined-h-2494-de_SmPC.json']
['emea-combined-h-2494-de_SmPC.json']
Starting Heading Extraction For File :- emea-combined-h-2494-de_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\de\emea-combined-h-2494-de_SmPC.json
--------------------------------------------
SmPC
True  ||    ||  1.       BEZEICHNUNG DES ARZNEIMITTELS  ||  1. BEZEICHNUNG DES ARZNEIMITTELS
True  ||    ||  2.       QUALITATIVE UND QUANTITATIVE ZUSAMMENSETZUNG  ||  2. QUALITATIVE UND QUANTITATIVE ZUSAMMENSETZUNG
True  ||  <=7|11.63|(95, 88, 95)|0.97|  ||  Sonstiger Bestandteil mit bekannter Wirkung  ||  Sonstige(r) Bestandteil(e) mit bekannter Wirkung
True  ||  <=7|11.63|(95, 88, 95)|0.97|  ||  Sonstiger Bestandteil mit bekannter Wirkung  ||  Son

True  ||    ||  Biotransformation  ||  Biotransformation
True  ||    ||  Elimination  ||  Elimination
True  ||    ||  Linearität/Nicht-Linearität  ||  Linearität/Nicht-Linearität
True  ||    ||  5.3     Präklinische Daten zur Sicherheit  ||  5.3 Präklinische Daten zur Sicherheit
True  ||    ||  6.       PHARMAZEUTISCHE ANGABEN  ||  6. PHARMAZEUTISCHE ANGABEN
True  ||    ||  6.1     Liste der sonstigen Bestandteile  ||  6.1 Liste der sonstigen Bestandteile
True  ||    ||  6.2     Inkompatibilitäten  ||  6.2 Inkompatibilitäten
True  ||    ||  6.3     Dauer der Haltbarkeit  ||  6.3 Dauer der Haltbarkeit
True  ||    ||  6.4     Besondere Vorsichtsmaßnahmen für die Aufbewahrung  ||  6.4 Besondere Vorsichtsmaßnahmen für die Aufbewahrung
True  ||  SpecialCase1|180.0|(45, 77, 86)|0.67|  ||  6.5     Art und Inhalt des Behältnisses  ||  6.5 Art und Inhalt des Behältnisses <und spezielles Zubehör für den Gebrauch, die Anwendung oder die Implantation>
True  ||  SpecialCase2|8.22|(96, 93, 97)|0.97|

2021-04-26 01:34:33,793 : ExtractContentBetween_r : Cleaning Match Results
2021-04-26 01:34:33,793 : ExtractContentBetween_r : Cleaning Match Results
2021-04-26 01:34:33,799 : ExtractContentBetween_r : Finished Cleaning Match Results
2021-04-26 01:34:33,799 : ExtractContentBetween_r : Finished Cleaning Match Results
2021-04-26 01:34:33,829 : ExtractContentBetween_r : Extracting Content Between Headings
2021-04-26 01:34:33,829 : ExtractContentBetween_r : Extracting Content Between Headings
2021-04-26 01:34:33,853 : ExtractContentBetween_r : Finished Extracting Content Between Headings
2021-04-26 01:34:33,853 : ExtractContentBetween_r : Finished Extracting Content Between Headings


Error Found
Completed Document Annotation For File
Starting Extracting Content Between Heading For File :- emea-combined-h-2494-de_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\de\emea-combined-h-2494-de_SmPC.json
--------------------------------------------


Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id,Text,Html_betw
0,410,12003,CAP,1.0,BEZEICHNUNG DES ARZNEIMITTELS,12001,1. BEZEICHNUNG DES ARZNEIMITTELS,33,c1dc0f0e-e9cf-4f07-84cb-386f992037e4,0,,\n1. BEZEICHNUNG DES ARZNEIMITTELS\n \nKalydeco 75 mg Filmtabletten\nKalydeco 150 mg Filmtabletten\n \n,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;page-break-after: avoid""><b><span lang=""DE"">1. BEZEICHNUNG DES ARZNEIMITTELS</span></b></p><p class=""MsoNormal"" style=""pa..."
1,411,12004,CAP,2.0,QUALITATIVE UND QUANTITATIVE ZUSAMMENSETZUNG,12001,2. QUALITATIVE UND QUANTITATIVE ZUSAMMENSETZUNG,39,f23d309c-1764-4b9c-8ea7-cacfb90cd7d7,0,12003.0,\n2. QUALITATIVE UND QUANTITATIVE ZUSAMMENSETZUNG\n \nKalydeco 75 mg Filmtabletten\n \nJede Filmtablette enthält 75 mg Ivacaftor.\n,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;page-break-after: avoid""><b><span lang=""DE"">2. QUALITATIVE UND QUANTITATIVE ZUSAMMENSETZUNG</span></b></p><p class=""MsoNo..."
2,414,12007,CAP,,Sonstige(r) Bestandteil(e) mit bekannter Wirkung,12006,Sonstiger Bestandteil mit bekannter Wirkung,45,d1934dfa-14d5-408a-acc2-c76d02777d4f,0,12004.0,"\nSonstiger Bestandteil mit bekannter Wirkung\n \nJede Filmtablette enthält 83,6 mg Lactose-Monohydrat.\n \nKalydeco 150 mg Filmtabletten\n \nJede Filmtablette enthält 150 mg Ivacaftor. \n","<p class=""MsoNormal"" style=""page-break-after:avoid""><i><span lang=""DE"">Sonstiger Bestandteil mit bekannter Wirkung</span></i></p><p class=""MsoNormal"" style=""page-break-after:avoid""><i><u><span lan..."
3,414,12007,CAP,,Sonstige(r) Bestandteil(e) mit bekannter Wirkung,12006,Sonstiger Bestandteil mit bekannter Wirkung,53,450e75ba-6194-4fd8-a6e6-4a6e3cf23e8e,0,12004.0,"\nSonstiger Bestandteil mit bekannter Wirkung\n \nJede Filmtablette enthält 167,2 mg Lactose-Monohydrat.\n \nVollständige Auflistung der sonstigen Bestandteile, siehe Abschnitt 6.1.\n \n","<p class=""MsoNormal"" style=""page-break-after:avoid""><i><span lang=""DE"">Sonstiger Bestandteil mit bekannter Wirkung</span></i></p><p class=""MsoNormal"" style=""page-break-after:avoid""><u><span lang=""..."
4,415,12008,CAP,3.0,DARREICHUNGSFORM,12001,3. DARREICHUNGSFORM,60,4d172eb6-f275-4410-9e61-c1448354afba,0,12004.0,"\n3. DARREICHUNGSFORM\n \nFilmtablette (Tablette)\n \nKalydeco 75 mg Filmtabletten\n \nHellblaue ovale Filmtabletten, auf der einen Seite mit dem Aufdruck „V 75“ in schwarzer Tinte und auf d...","<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;page-break-after: avoid""><b><span lang=""DE"">3. DARREICHUNGSFORM</span></b></p><p class=""MsoNormal"" style=""text-align:just..."
5,416,12009,CAP,4.0,KLINISCHE ANGABEN,12001,4. KLINISCHE ANGABEN,73,2b8cb91b-b663-4af7-b003-ef514609f876,0,12008.0,\n4. KLINISCHE ANGABEN\n,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;page-break-after: avoid""><b><span lang=""DE"" style=""text-transform:uppercase"">4. KLINISCHE ANGABEN</span></b></p><p class=..."
6,417,12010,CAP,4.1,Anwendungsgebiete,12009,4.1 Anwendungsgebiete,75,890dd5ca-7c66-44f4-902c-815b6d4af949,0,12009.0,"\n4.1 Anwendungsgebiete\n \nKalydeco-Tabletten werden angewendet:\n· als Monotherapie zur Behandlung von Erwachsenen, Jugendlichen und Kindern ab 6 Jahren mit einem Körpergewicht von m...","<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;page-break-after: avoid""><b><span lang=""DE"">4.1 Anwendungsgebiete</span></b></p><p class=""MsoNormal"" style=""page-break-afte..."
7,418,12011,CAP,4.2,Dosierung und Art der Anwendung,12009,4.2 Dosierung und Art der Anwendung,84,f9ac513d-880b-4e62-869a-bf3fe049a0b5,0,12009.0,\n4.2 Dosierung und Art der Anwendung\n \nKalydeco sollte nur von Ärzten mit Erfahrung in der Behandlung der zystischen Fibrose verordnet werden. Wenn der Genotyp des Patienten nicht bekannt i...,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;page-break-after: avoid""><b><span lang=""DE"">4.2 Dosierung und Art der Anwendung</span></b></p><p class=""MsoNormal"" style=""p..."
8,419,12012,CAP,,Dosierung,12011,Dosierung,88,d6d439a9-26db-4bca-91ea-c113aae421a5,0,12011.0,"\nDosierung\n \nDie Dosierung bei Erwachsenen, Jugendlichen und Kindern ab 6 Jahren sollte entsprechend den Angaben in Tabelle 1 erfolgen. \n \nTabelle 1: Dosierungsempfehlungen\n Morgens ...","<p class=""MsoNormal"" style=""page-break-after:avoid""><u><span lang=""DE"">Dosierung</span></u></p><p class=""MsoNormal"" style=""page-break-after:avoid""><i><span lang=""DE""> </span></i></p><p class=""MsoN..."
9,420,12013,CAP,,Kinder und Jugendliche,12012,Kinder und Jugendliche,133,df1e9a42-d7a6-45ee-8fd6-fb4771261eb2,0,12012.0,"\nKinder und Jugendliche\n \nDie Sicherheit und Wirksamkeit von Ivacaftor bei Kindern unter 4 Monaten als Monotherapie, in Kombination mit Tezacaftor/Ivacaftor bei Kindern unter 6 Jahren oder in K...","<p class=""MsoNormal"" style=""page-break-after:avoid""><u><span lang=""DE"">Kinder und Jugendliche</span></u></p><p class=""MsoNormal"" style=""page-break-after:avoid""><b><span lang=""DE""> </span></b></p><..."


2021-04-26 01:34:33,908 : XmlGeneration_7 : Initiating XML Generation


Completed Extracting Content Between Heading


2021-04-26 01:34:34,284 : XmlGeneration_7 : Writing to File:emea-combined-h-2494-de_SmPC.xml


Created XML File For :- emea-combined-h-2494-de_SmPC.json


In [288]:
from match.rulebook.matchRulebook import MatchRuleBook

rules= MatchRuleBook(
            fileNameRuleBook= fileNameMatchRuleBook,
            procedureType= procedureType,
            languageCode= languageCode,
            documentNumber= 0).ruleDict



### Annex II

In [297]:
documentNumber = 1
docFilter = "ANHANG II.json"
stopWordFilterLen = 6
stopWordlanguage = 'german'
start=0
end=1

In [298]:
a,b,c = parseDocuments(procedureType,
               languageCode,
               documentNumber,
               docFilter,
               fileNameQrd,
               fileNameMatchRuleBook,
               fileNameDocumentTypeNames,
               stopWordFilterLen,
               start,
               end
              )

F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\de
['emea-combined-h-2494-de_ PACKUNGSBEILAGE.json', 'emea-combined-h-2494-de_ANHANG II.json', 'emea-combined-h-2494-de_ANHANG III.json', 'emea-combined-h-2494-de_SmPC.json']
['emea-combined-h-2494-de_ANHANG II.json']
Starting Heading Extraction For File :- emea-combined-h-2494-de_ANHANG II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\de\emea-combined-h-2494-de_ANHANG II.json
--------------------------------------------
ANHANG II

OriginalCheck
('<=4|44.44|(56, 56, 100)|0.76|', 'ANhang II', 'ANHANG II')

True  ||    ||  ANhang II  ||  ANHANG II
True  ||  Contains<>|175.56|(53, 87, 86)|0.66|  ||  A.        HERSTELLER, DER  FÜR DIE CHARGENFREIGABE VERANTWORTLICH IST   ||  A. <HERSTELLER DES WIRKSTOFFS/DER WIRKSTOFFE BIOLOGISCHEN URSPRUNGS UND> HERSTELLER, DER (DIE) FÜR DIE CHARGENFREIGABE VERANTWORTLICH IST (SIND)
True  ||    ||  B.        BEDINGUNGEN ODER E

2021-04-26 01:44:54,263 : ExtractContentBetween_e : Cleaning Match Results
2021-04-26 01:44:54,268 : ExtractContentBetween_e : Finished Cleaning Match Results
2021-04-26 01:44:54,277 : ExtractContentBetween_e : Extracting Content Between Headings
2021-04-26 01:44:54,284 : ExtractContentBetween_e : Finished Extracting Content Between Headings



OriginalCheck
('<=4|77.67|(32, 40, 86)|0.55|', 'Der Inhaber der Genehmigung für das Inverkehrbringen schließt innerhalb des festgelegten Zeitrahmens folgende Maßnahmen ab:', 'Verpflichtung zur Durchführung von Maßnahmen nach der Zulassung ')



Heading Not Found 
 ['Name und Anschrift des (der) Hersteller(s) des Wirkstoffs/der Wirkstoffe biologischen Ursprungs', 'Amtliche Chargenfreigabe', 'Zusätzliche Maßnahmen zur Risikominimierung', 'SPEZIFISCHE VERPFLICHTUNG ZUM ABSCHLUSS VON MASSNAHMEN NACH DER ZULASSUNG <UNTER „BESONDEREN BEDINGUNGEN“> <UNTER „AUSSERGEWÖHNLICHEN UMSTÄNDEN“']


dict_keys([])
Completed Heading Extraction For File
Starting Document Annotation For File :- emea-combined-h-2494-de_ANHANG II.json
Error Found
Completed Document Annotation For File
Starting Extracting Content Between Heading For File :- emea-combined-h-2494-de_ANHANG II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\de\emea-combined-h-2494-de_ANHANG 

Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id,Text,Html_betw
0,468,13001,CAP,,ANHANG II,,ANhang II,24,8eaaf8e2-de3b-489a-80f8-6ed349fae5c7,0,,\nANhang II\n,"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><b><span lang=""DE"" style=""text-transform:uppercase"">ANhang II</span></b></p><p class=""BodytextAgency"" style=""margin-bottom:0in;line-he..."
1,469,13002,CAP,A,"<HERSTELLER DES WIRKSTOFFS/DER WIRKSTOFFE BIOLOGISCHEN URSPRUNGS UND> HERSTELLER, DER (DIE) FÜR DIE CHARGENFREIGABE VERANTWORTLICH IST (SIND)",13001.0,"A. HERSTELLER, DER FÜR DIE CHARGENFREIGABE VERANTWORTLICH IST",26,8352f953-9991-4b8b-add7-b68c8c110803,0,13001.0,"\nA. HERSTELLER, DER FÜR DIE CHARGENFREIGABE VERANTWORTLICH IST \n","<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom: 0in;margin-left:85.05pt;margin-bottom:.0001pt;text-indent:-35.4pt;line-height: 13.0pt""><b><span lang=""DE"">A. HE..."
2,472,13005,CAP,B,BEDINGUNGEN ODER EINSCHRÄNKUNGEN FÜR DIE ABGABE UND DEN GEBRAUCH,13001.0,B. BEDINGUNGEN ODER EINSCHRÄNKUNGEN FÜR DIE ABGABE UND DEN GEBRAUCH,28,6eef24d5-866f-46fb-b09e-e4baab0958ac,0,13001.0,\nB. BEDINGUNGEN ODER EINSCHRÄNKUNGEN FÜR DIE ABGABE UND DEN GEBRAUCH\n,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom: 0in;margin-left:85.05pt;margin-bottom:.0001pt;text-indent:-35.4pt;line-height: 13.0pt""><b><span lang=""DE"">B. BE..."
3,474,13007,CAP,C,SONSTIGE BEDINGUNGEN UND AUFLAGEN DER GENEHMIGUNG FÜR DAS INVERKEHRBRINGEN,13001.0,C. SONSTIGE BEDINGUNGEN UND AUFLAGEN DER GENEHMIGUNG FÜR DAS INVERKEHRBRINGEN,30,22886832-dea2-4475-b626-581b3d3ec622,0,13001.0,\nC. SONSTIGE BEDINGUNGEN UND AUFLAGEN DER GENEHMIGUNG FÜR DAS INVERKEHRBRINGEN\n,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom: 0in;margin-left:85.05pt;margin-bottom:.0001pt;text-indent:-35.4pt;line-height: 13.0pt""><b><span lang=""DE"">C. </..."
4,476,13009,CAP,D,BEDINGUNGEN ODER EINSCHRÄNKUNGEN FÜR DIE SICHERE UND WIRKSAME ANWENDUNG DES ARZNEIMITTELS,13001.0,D. BEDINGUNGEN ODER EINSCHRÄNKUNGEN FÜR DIE SICHERE UND WIRKSAME ANWENDUNG DES ARZNEIMITTELS,32,936756db-6809-4c64-bd71-cfcb6616340b,0,13001.0,\nD. BEDINGUNGEN ODER EINSCHRÄNKUNGEN FÜR DIE SICHERE UND WIRKSAME ANWENDUNG DES ARZNEIMITTELS\n \n\n \n,"<p class=""NormalAgency"" style=""margin-left:85.05pt;text-indent:-35.4pt""><b><span lang=""DE"" style='font-size:11.0pt;font-family:""Times New Roman"",serif'>D.</span></b><span lang=""DE"" style='font-siz..."
5,469,13002,CAP,A,"<HERSTELLER DES WIRKSTOFFS/DER WIRKSTOFFE BIOLOGISCHEN URSPRUNGS UND> HERSTELLER, DER (DIE) FÜR DIE CHARGENFREIGABE VERANTWORTLICH IST (SIND)",13001.0,"A. HERSTELLER, DER FÜR DIE CHARGENFREIGABE VERANTWORTLICH IST",37,b5d0e2d3-f719-48c1-b3c7-45a2653e41d4,1,,"\nA. HERSTELLER, DER FÜR DIE CHARGENFREIGABE VERANTWORTLICH IST \n","<p class=""TitleB""><span lang=""DE"" style=""text-transform:uppercase"">A. </span><span lang=""DE"">HERSTELLER, DER FÜR DIE CHARGENFREIGABE VERANTWORTLICH IST </span></p><p class=""BodytextAgency"" st..."
6,471,13004,CAP,,"Name und Anschrift des (der) Hersteller(s), der (die) für die Chargenfreigabe verantwortlich ist (sind)",13002.0,"Name und Anschrift des Herstellers, der für die Chargenfreigabe verantwortlich ist",39,fcd111f7-7dd6-42a3-9058-adb632b8db6c,1,13002.0,"\nName und Anschrift des Herstellers, der für die Chargenfreigabe verantwortlich ist \n \nAlmac Pharma Services (Ireland) Limited\nFinnabair Industrial Estate\nDundalk\nCo. Louth\nA91 P9KD\nIrland...","<p class=""MsoNormal"" style=""page-break-after:avoid""><u><span lang=""DE"">Name und Anschrift des Herstellers, der für die Chargenfreigabe verantwortlich ist </span></u></p><p class=""MsoNormal"" style=..."
7,472,13005,CAP,B,BEDINGUNGEN ODER EINSCHRÄNKUNGEN FÜR DIE ABGABE UND DEN GEBRAUCH,13001.0,B. BEDINGUNGEN ODER EINSCHRÄNKUNGEN FÜR DIE ABGABE UND DEN GEBRAUCH,57,8fc117b7-935f-4791-b529-4ad7c661e36d,1,13004.0,\nB. BEDINGUNGEN ODER EINSCHRÄNKUNGEN FÜR DIE ABGABE UND DEN GEBRAUCH\n \nArzneimittel auf eingeschränkte ärztliche Verschreibung (siehe Anhang I: Zusammenfassung der Merkmale des Arzneimitte...,"<p class=""TitleB""><span lang=""DE"">B. BEDINGUNGEN ODER EINSCHRÄNKUNGEN FÜR DIE ABGABE UND DEN GEBRAUCH</span></p><p class=""NormalAgency"" style=""page-break-after:avoid""><span lang=""DE"" style='f..."
8,474,13007,CAP,C,SONSTIGE BEDINGUNGEN UND AUFLAGEN DER GENEHMIGUNG FÜR DAS INVERKEHRBRINGEN,13001.0,C. SONSTIGE BEDINGUNGEN UND AUFLAGEN DER GENEHMIGUNG FÜR DAS INVERKEHRBRINGEN,62,51970adf-2c7f-4d3d-84ad-33196bce6a4d,1,13005.0,\nC. SONSTIGE BEDINGUNGEN UND AUFLAGEN DER GENEHMIGUNG FÜR DAS INVERKEHRBRINGEN \n,"<p class=""TitleB""><span lang=""DE"">C. SONSTIGE BEDINGUNGEN UND AUFLAGEN DER GENEHMIGUNG FÜR DAS INVERKEHRBRINGEN </span></p><p class=""TitleB""><span lang=""DE""> </span></p>"
9,475,13008,CAP,,Regelmäßig aktualisierte Unbedenklichkeitsberichte [Periodic Safety Update Reports (PSURs)],13007.0,· Regelmäßig aktualisierte Unbedenklichkeitsberichte [Periodic Safety Update Reports (PSURs)],64,6fe19043-ebca-41e0-8e9b-e89fa3634b84,1,13007.0,\n· Regelmäßig aktualisierte Unbedenklichkeitsberichte [Periodic Safety Update Reports (PSURs)]\n \nDie Anforderungen an die Einreichung von PSURs für dieses Arzneimittel sind in der nach Art...,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.05pt;margin-bottom: 0in;margin-left:14.2pt;margin-bottom:.0001pt;text-indent:-14.2pt;page-break-after: avoid""><span lang=""DE"" style=""font-..."


2021-04-26 01:44:54,317 : XmlGeneration_g : Initiating XML Generation
2021-04-26 01:44:54,372 : XmlGeneration_g : Writing to File:emea-combined-h-2494-de_ANHANG II.xml


Completed Extracting Content Between Heading
Created XML File For :- emea-combined-h-2494-de_ANHANG II.json


### Labeling

In [302]:
documentNumber = 2
docFilter = "ANHANG III.json"
stopWordFilterLen = 6
stopWordlanguage = 'german'
start=0
end=1

In [303]:
a,b,c = parseDocuments(procedureType,
               languageCode,
               documentNumber,
               docFilter,
               fileNameQrd,
               fileNameMatchRuleBook,
               fileNameDocumentTypeNames,
               stopWordFilterLen,
               start,
               end
              )

F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\de
['emea-combined-h-2494-de_ PACKUNGSBEILAGE.json', 'emea-combined-h-2494-de_ANHANG II.json', 'emea-combined-h-2494-de_ANHANG III.json', 'emea-combined-h-2494-de_SmPC.json']
['emea-combined-h-2494-de_ANHANG III.json']
Starting Heading Extraction For File :- emea-combined-h-2494-de_ANHANG III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\de\emea-combined-h-2494-de_ANHANG III.json
--------------------------------------------
Etikettierung
True  ||  <=1|18.75|(90, 100, 95)|0.96|  ||  A. ETIKETTIERUNG  ||  ETIKETTIERUNG 
True  ||  Contains<>|82.35|(68, 88, 86)|0.9|  ||  ANGABEN AUF DER ÄUSSEREN UMHÜLLUNG  ||  ANGABEN <AUF DER ÄUSSEREN UMHÜLLUNG> <UND> <AUF DEM BEHÄLTNIS>
True  ||    ||  1.       BEZEICHNUNG DES ARZNEIMITTELS  ||  1. BEZEICHNUNG DES ARZNEIMITTELS
True  ||    ||  2.       WIRKSTOFF(E)  ||  2. WIRKSTOFF(E)
True  ||    ||  3.       SONSTIGE BESTAN

True  ||    ||  MINDESTANGABEN AUF BLISTERPACKUNGEN ODER FOLIENSTREIFEN  ||  MINDESTANGABEN AUF BLISTERPACKUNGEN ODER FOLIENSTREIFEN
True  ||    ||  1.       BEZEICHNUNG DES ARZNEIMITTELS  ||  1. BEZEICHNUNG DES ARZNEIMITTELS
True  ||    ||  2.       NAME DES PHARMAZEUTISCHEN UNTERNEHMERS  ||  2. NAME DES PHARMAZEUTISCHEN UNTERNEHMERS
True  ||    ||  3.       VERFALLDATUM  ||  3. VERFALLDATUM
True  ||  Contains<>|133.33|(60, 100, 90)|0.89|  ||  4.       CHARGENBEZEICHNUNG  ||  4. CHARGENBEZEICHNUNG<, SPENDER- UND PRODUKTCODE>
True  ||    ||  5.       WEITERE ANGABEN  ||  5. WEITERE ANGABEN
oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
True  ||  Contains<>|82.35|(68, 88, 86)|0.9|  ||  ANGABEN AUF DER ÄUSSEREN UMHÜLLUNG  ||  ANGABEN <AUF DER ÄUSSEREN UMHÜLLUNG> <UND> <AUF DEM BEHÄLTNIS>
True  ||    ||  1.       BEZEICHNUNG DES ARZNEIMITTELS  ||  1. BEZEICHNUNG DES ARZNEIMITTELS
True  ||    ||  2.       WIRKSTOFF(E

True  ||    ||  16.     ANGABEN IN BLINDENSCHRIFT  ||  16. ANGABEN IN BLINDENSCHRIFT
True  ||    ||  17.     INDIVIDUELLES ERKENNUNGSMERKMAL – 2D-BARCODE  ||  17. INDIVIDUELLES ERKENNUNGSMERKMAL – 2D-BARCODE
True  ||    ||  18.     INDIVIDUELLES ERKENNUNGSMERKMAL – VOM MENSCHEN LESBARES FORMAT  ||  18. INDIVIDUELLES ERKENNUNGSMERKMAL – VOM MENSCHEN LESBARES FORMAT
True  ||    ||  1.       BEZEICHNUNG DES ARZNEIMITTELS  ||  1. BEZEICHNUNG DES ARZNEIMITTELS
oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
True  ||    ||  2.       WIRKSTOFF(E)  ||  2. WIRKSTOFF(E)
True  ||    ||  3.       SONSTIGE BESTANDTEILE  ||  3. SONSTIGE BESTANDTEILE
True  ||    ||  4.       DARREICHUNGSFORM UND INHALT  ||  4. DARREICHUNGSFORM UND INHALT

OriginalCheck
('<=4|24.14|(76, 76, 100)|0.89|', '5.       Hinweise zur UND ART(EN) DER ANWENDUNG', '5. HINWEISE ZUR UND ART(EN) DER ANWENDUNG')

----------------------------------
RemovedByStyl

True  ||    ||  5.       INHALT NACH GEWICHT, VOLUMEN ODER EINHEITEN  ||  5. INHALT NACH GEWICHT, VOLUMEN ODER EINHEITEN
True  ||    ||  6.       WEITERE ANGABEN  ||  6. WEITERE ANGABEN
oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
True  ||  Contains<>|82.35|(68, 88, 86)|0.9|  ||  ANGABEN AUF DER ÄUSSEREN UMHÜLLUNG  ||  ANGABEN <AUF DER ÄUSSEREN UMHÜLLUNG> <UND> <AUF DEM BEHÄLTNIS>
True  ||    ||  1.       BEZEICHNUNG DES ARZNEIMITTELS  ||  1. BEZEICHNUNG DES ARZNEIMITTELS
True  ||  <=4|25.0|(89, 100, 95)|0.97|  ||  2.       WIRKSTOFF  ||  2. WIRKSTOFF(E)
True  ||    ||  3.       SONSTIGE BESTANDTEILE  ||  3. SONSTIGE BESTANDTEILE
True  ||    ||  4.       DARREICHUNGSFORM UND INHALT  ||  4. DARREICHUNGSFORM UND INHALT

OriginalCheck
('<=4|24.14|(76, 76, 100)|0.89|', '5.       Hinweise zur UND ART(EN) DER ANWENDUNG', '5. HINWEISE ZUR UND ART(EN) DER ANWENDUNG')

----------------------------------
RemovedByStyle  

True  ||    ||  17.     INDIVIDUELLES ERKENNUNGSMERKMAL – 2D-BARCODE  ||  17. INDIVIDUELLES ERKENNUNGSMERKMAL – 2D-BARCODE
True  ||    ||  18.     INDIVIDUELLES ERKENNUNGSMERKMAL – VOM MENSCHEN LESBARES FORMAT  ||  18. INDIVIDUELLES ERKENNUNGSMERKMAL – VOM MENSCHEN LESBARES FORMAT
True  ||    ||  1.       BEZEICHNUNG DES ARZNEIMITTELS  ||  1. BEZEICHNUNG DES ARZNEIMITTELS
oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
True  ||  <=4|25.0|(89, 100, 95)|0.97|  ||  2.       WIRKSTOFF  ||  2. WIRKSTOFF(E)
True  ||    ||  3.       SONSTIGE BESTANDTEILE  ||  3. SONSTIGE BESTANDTEILE
True  ||    ||  4.       DARREICHUNGSFORM UND INHALT  ||  4. DARREICHUNGSFORM UND INHALT

OriginalCheck
('<=4|24.14|(76, 76, 100)|0.89|', '5.       Hinweise zur UND ART(EN) DER ANWENDUNG', '5. HINWEISE ZUR UND ART(EN) DER ANWENDUNG')

----------------------------------
RemovedByStyle  ||    ||  5.       Hinweise zur UND ART(EN) DER ANWENDUNG

2021-04-26 01:49:11,111 : ExtractContentBetween_q : Cleaning Match Results
2021-04-26 01:49:11,119 : ExtractContentBetween_q : Finished Cleaning Match Results
2021-04-26 01:49:11,164 : ExtractContentBetween_q : Extracting Content Between Headings




Heading Not Found 
 ['HINWEISE ZUR ANWENDUNG']


dict_keys(['5. HINWEISE ZUR UND ART(EN) DER ANWENDUNG', '2. HINWEISE ZUR ANWENDUNG'])
Completed Heading Extraction For File
Starting Document Annotation For File :- emea-combined-h-2494-de_ANHANG III.json
Error Found
Completed Document Annotation For File
Starting Extracting Content Between Heading For File :- emea-combined-h-2494-de_ANHANG III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\de\emea-combined-h-2494-de_ANHANG III.json
--------------------------------------------


2021-04-26 01:49:11,829 : ExtractContentBetween_q : Finished Extracting Content Between Headings


Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id,Text,Html_betw
0,481,14001,CAP,,ETIKETTIERUNG,,A. ETIKETTIERUNG,52,1ee96412-c073-4158-971d-729b88981b4d,0,,\nA. ETIKETTIERUNG\n \n\n \n,"<p class=""TitleA""><span lang=""DE"">A. ETIKETTIERUNG</span></p><span lang=""DE"" style='font-size:11.0pt;font-family:""Times New Roman"",serif; color:black'><br clear=""all"" style=""page-break-before:alwa..."
1,482,14002,CAP,,ANGABEN <AUF DER ÄUSSEREN UMHÜLLUNG> <UND> <AUF DEM BEHÄLTNIS>,14001.0,ANGABEN AUF DER ÄUSSEREN UMHÜLLUNG,57,814cb64c-8d8d-4218-8861-b1b3ffac3c22,0,14001.0,\nANGABEN AUF DER ÄUSSEREN UMHÜLLUNG\n \nUMKarton FÜR DIE BLISTERPACKUNG MIT 56 TABLETTEN\n \n \n,"<p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""DE""> </span></p><p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""DE""> </span></p><div style=""border:solid windowtext ..."
2,483,14003,CAP,1.0,BEZEICHNUNG DES ARZNEIMITTELS,14002.0,1. BEZEICHNUNG DES ARZNEIMITTELS,63,9bfdbbd8-d691-4bbf-8abe-8ebebb6df119,0,14002.0,\n1. BEZEICHNUNG DES ARZNEIMITTELS\n \nKalydeco 150 mg Filmtabletten\nIvacaftor\n \n \n,"<p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""DE""> </span></p><p class=""MsoNormal""><span lang=""DE"">Kalydeco 150 mg Filmtabletten</span></p><p class=""MsoNormal""><span lang=""DE"">Iv..."
3,484,14004,CAP,2.0,WIRKSTOFF(E),14002.0,2. WIRKSTOFF(E),70,61b2e6f5-e534-49af-bb2e-188015cf6aa9,0,14002.0,\n2. WIRKSTOFF(E)\n \nEine Tablette enthält 150 mg Ivacaftor.\n \n \n,"<p class=""MsoNormal"" style=""page-break-after:avoid""><i><span lang=""DE""> </span></i></p><p class=""MsoNormal""><span lang=""DE"">Eine Tablette enthält 150 mg Ivacaftor.</span></p><p class=""MsoNormal""><..."
4,485,14005,CAP,3.0,SONSTIGE BESTANDTEILE,14002.0,3. SONSTIGE BESTANDTEILE,76,add72e15-7791-45c8-ad64-b364e2935c96,0,14002.0,\n3. SONSTIGE BESTANDTEILE\n \nEnthält Lactose.\n \nPackungsbeilage beachten.\n \n \n,"<p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""DE""> </span></p><p class=""MsoNormal""><span lang=""DE"">Enthält Lactose.</span></p><p class=""MsoNormal""><span lang=""DE""> </span></p><p ..."
5,486,14006,CAP,4.0,DARREICHUNGSFORM UND INHALT,14002.0,4. DARREICHUNGSFORM UND INHALT,84,b183a2b5-7831-433e-8436-a5931b1c850a,0,14002.0,\n4. DARREICHUNGSFORM UND INHALT\n \n56 Tabletten\n \n \n \n5. Hinweise zur UND ART(EN) DER ANWENDUNG\n \nPackungsbeilage beachten.\n \nZum Einnehmen \n \nHinweise zur Anwendung\n \nZ...,"<p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""DE""> </span></p><p class=""MsoNormal""><span lang=""DE"">56 Tabletten</span></p><p class=""MsoNormal""><span lang=""DE""> </span></p><p clas..."
6,488,14008,CAP,6.0,"WARNHINWEIS, DASS DAS ARZNEIMITTEL FÜR KINDER UNZUGÄNGLICH AUFZUBEWAHREN IST",14002.0,"6. WARNHINWEIS, DASS DAS ARZNEIMITTEL FÜR KINDER UNZUGÄNGLICH AUFZUBEWAHREN IST",104,85799aea-03cc-48c3-8f16-061496d64328,0,14002.0,"\n6. WARNHINWEIS, DASS DAS ARZNEIMITTEL FÜR KINDER UNZUGÄNGLICH AUFZUBEWAHREN IST\n \nArzneimittel für Kinder unzugänglich aufbewahren.\n \n \n","<p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""DE""> </span></p><p class=""MsoNormal""><span lang=""DE"">Arzneimittel für Kinder unzugänglich aufbewahren.</span></p><p class=""MsoNormal..."
7,489,14009,CAP,7.0,"WEITERE WARNHINWEISE, FALLS ERFORDERLICH",14002.0,"7. WEITERE WARNHINWEISE, FALLS ERFORDERLICH",110,ae7f9af8-5e40-4c73-8e37-df1e439140e3,0,14002.0,"\n7. WEITERE WARNHINWEISE, FALLS ERFORDERLICH\n \n \n","<p class=""MsoNormal""><span lang=""DE""> </span></p><p class=""MsoNormal""><span lang=""DE""> </span></p><div style=""border:solid windowtext 1.0pt;padding:1.0pt 4.0pt 1.0pt 4.0pt""> <p class=""MsoNormal"" s..."
8,490,14010,CAP,8.0,VERFALLDATUM,14002.0,8. VERFALLDATUM,114,819c362b-f563-4ae8-94cb-67f2fd6c04ee,0,14002.0,\n8. VERFALLDATUM\n \nVerwendbar bis\n \n \n,"<p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""DE""> </span></p><p class=""MsoNormal""><span lang=""DE"">Verwendbar bis</span></p><p class=""MsoNormal""><span lang=""DE""> </span></p><p cl..."
9,491,14011,CAP,9.0,BESONDERE VORSICHTSMASSNAHMEN FÜR DIE AUFBEWAHRUNG,14002.0,9. BESONDERE VORSICHTSMASSNAHMEN FÜR DIE AUFBEWAHRUNG,120,2807c6e7-25f8-4f38-b355-88f6f9802a3b,0,14002.0,\n9. BESONDERE VORSICHTSMASSNAHMEN FÜR DIE AUFBEWAHRUNG\n \n \n,"<p class=""MsoNormal"" style=""page-break-after:avoid""><span lang=""DE""> </span></p><p class=""MsoNormal""><span lang=""DE""> </span></p><div style=""border:solid windowtext 1.0pt;padding:1.0pt 4.0pt 1.0pt..."


2021-04-26 01:49:11,998 : XmlGeneration_Y : Initiating XML Generation


Completed Extracting Content Between Heading


2021-04-26 01:49:12,327 : XmlGeneration_Y : Writing to File:emea-combined-h-2494-de_ANHANG III.xml


Created XML File For :- emea-combined-h-2494-de_ANHANG III.json


In [304]:
a

Unnamed: 0,Bold,Classes,Element,HasBorder,ID,Indexed,IsHeadingType,IsListItem,IsPossibleHeading,Italics,ParentId,Styles,Text,Underlined,Uppercased,StringLength
0,False,,"<br clear=""all"" style=""page-break-before:always""/>",False,58493305-8cb3-467b-bc6f-a6e9910b4878,False,,False,False,False,4c50a9da-49c0-475f-801d-948e6db0ee50,page-break-before:always,,False,False,0
1,False,['MsoNormal'],"<p class=""MsoNormal""><span lang=""DE""> </span></p>",False,11e05ac0-51f4-4f07-bc53-19eb35ebbd65,False,,False,False,False,10be69e9-3b63-42d1-a4d7-5923f53ece56,,,False,False,0
2,False,['MsoNormal'],"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><span lang=""DE""> </span></p>",False,9bf3240d-e647-4eb4-89fc-2d134e17bb35,False,,False,False,False,10be69e9-3b63-42d1-a4d7-5923f53ece56,text-align:center,,False,False,0
3,False,['MsoNormal'],"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><span lang=""DE""> </span></p>",False,a5905654-f89f-44ab-865d-50acc8c803b1,False,,False,False,False,10be69e9-3b63-42d1-a4d7-5923f53ece56,text-align:center,,False,False,0
4,False,['MsoNormal'],"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><span lang=""DE""> </span></p>",False,873e51b0-ff1e-4b36-8fe0-d1c5c6121d17,False,,False,False,False,10be69e9-3b63-42d1-a4d7-5923f53ece56,text-align:center,,False,False,0
5,False,['MsoNormal'],"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><span lang=""DE""> </span></p>",False,de4b3d14-3786-494a-836a-9b818ee1d718,False,,False,False,False,10be69e9-3b63-42d1-a4d7-5923f53ece56,text-align:center,,False,False,0
6,False,['MsoNormal'],"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><span lang=""DE""> </span></p>",False,a453470b-8279-47d2-a841-d7e77d31531e,False,,False,False,False,10be69e9-3b63-42d1-a4d7-5923f53ece56,text-align:center,,False,False,0
7,False,['MsoNormal'],"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><span lang=""DE""> </span></p>",False,2c992047-cd71-4825-9e13-a2347f74bd55,False,,False,False,False,10be69e9-3b63-42d1-a4d7-5923f53ece56,text-align:center,,False,False,0
8,False,['MsoNormal'],"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><span lang=""DE""> </span></p>",False,f7ecf8d0-1f96-4f64-9940-89dbe6a710d6,False,,False,False,False,10be69e9-3b63-42d1-a4d7-5923f53ece56,text-align:center,,False,False,0
9,False,['MsoNormal'],"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><span lang=""DE""> </span></p>",False,f17398ed-da90-4c0a-9a2a-463341877a3c,False,,False,False,False,10be69e9-3b63-42d1-a4d7-5923f53ece56,text-align:center,,False,False,0


### Package Leaflet

In [314]:
documentNumber = 3
docFilter = "PACKUNGSBEILAGE.json"
stopWordFilterLen = 100
stopWordlanguage = 'german'
start=0
end=1
isPackageLeaflet = True

In [315]:
a,b,c = parseDocuments(procedureType,
               languageCode,
               documentNumber,
               docFilter,
               fileNameQrd,
               fileNameMatchRuleBook,
               fileNameDocumentTypeNames,
               stopWordFilterLen,
               start,
               end,
               isPackageLeaflet,
               "Kalydeco"
              )

F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\de
['emea-combined-h-2494-de_ PACKUNGSBEILAGE.json', 'emea-combined-h-2494-de_ANHANG II.json', 'emea-combined-h-2494-de_ANHANG III.json', 'emea-combined-h-2494-de_SmPC.json']
['emea-combined-h-2494-de_ PACKUNGSBEILAGE.json']
Starting Heading Extraction For File :- emea-combined-h-2494-de_ PACKUNGSBEILAGE.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\de\emea-combined-h-2494-de_ PACKUNGSBEILAGE.json
--------------------------------------------
Packungsbeilage
True  ||  <=1|16.67|(91, 100, 95)|0.91|  ||  B. PACKUNGSBEILAGE  ||  PACKUNGSBEILAGE
True  ||    ||  Was in dieser Packungsbeilage steht  ||  Was in dieser Packungsbeilage steht
----------------------------------
RemovedByStyle  ||    ||  1.       Was ist Kalydeco und wofür wird es angewendet?  ||  1. Was ist Kalydeco und wofür wird es angewendet?
----------------------------------
----------------------

True  ||  Contains<>|46.88|(81, 97, 95)|0.96|  ||  3.       Wie ist Kalydeco einzunehmen?  ||  3. Wie ist Kalydeco <einzunehmen> <anzuwenden>?
True  ||  Contains<>|38.16|(81, 74, 80)|0.92|  ||  Wenn Ihr Kind eine größere Menge von Kalydeco eingenommen hat, als es sollte  ||  Wenn Sie eine größere Menge von Kalydeco <eingenommen> <angewendet> haben, als Sie sollten
True  ||    ||  4.       Welche Nebenwirkungen sind möglich?  ||  4. Welche Nebenwirkungen sind möglich?
True  ||  Contains<>|3.64|(98, 98, 99)|1.0|  ||  Zusätzliche Nebenwirkungen bei Kindern und Jugendlichen  ||  Zusätzliche Nebenwirkungen bei Kindern <und Jugendlichen>
True  ||    ||  Meldung von Nebenwirkungen  ||  Meldung von Nebenwirkungen
True  ||    ||  5.       Wie ist Kalydeco aufzubewahren?  ||  5. Wie ist Kalydeco aufzubewahren?
True  ||    ||  6.       Inhalt der Packung und weitere Informationen  ||  6. Inhalt der Packung und weitere Informationen
True  ||    ||  Was Kalydeco enthält  ||  Was Kalydeco enthält
Tr

2021-04-26 02:00:23,873 : ExtractContentBetween_4 : Cleaning Match Results
2021-04-26 02:00:23,879 : ExtractContentBetween_4 : Finished Cleaning Match Results
2021-04-26 02:00:23,902 : ExtractContentBetween_4 : Extracting Content Between Headings
2021-04-26 02:00:23,925 : ExtractContentBetween_4 : Finished Extracting Content Between Headings




Heading Not Found 
 ['q Dieses Arzneimittel unterliegt einer zusätzlichen Überwachung. Dies ermöglicht eine schnelle Identifizierung neuer Erkenntnisse über die Sicherheit. Sie können dabei helfen, indem Sie jede auftretende Nebenwirkung melden. Hinweise zur Meldung von Nebenwirkungen, siehe Ende Abschnitt 4', 'X enthält {Bezeichnung <des> <der> sonstigen Bestandteil(s)(e)}', 'Pharmazeutischer Unternehmer und Hersteller', 'Falls Sie weitere Informationen über das Arzneimittel wünschen, setzen Sie sich bitte mit dem örtlichen Vertreter des pharmazeutischen Unternehmers in Verbindung:', 'Die folgenden Informationen sind für medizinisches Fachpersonal bestimmt:']


dict_keys(['1. Was ist Kalydeco und wofür wird es angewendet?', '2. Was sollten Sie vor der <Einnahme> <Anwendung> von Kalydeco beachten?', '3. Wie ist Kalydeco <einzunehmen> <anzuwenden>?', '4. Welche Nebenwirkungen sind möglich?', '5. Wie ist Kalydeco aufzubewahren?', '6. Inhalt der Packung und weitere Informationen', 'Kaly

Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id,Text,Html_betw
0,514,15001,CAP,,PACKUNGSBEILAGE,,B. PACKUNGSBEILAGE,24,0c8427e9-8cb3-403e-a4b1-07e240181c7e,0,,\nB. PACKUNGSBEILAGE\n\n \n\nGebrauchsinformation: Information für Patienten\n \nKalydeco 75 mg Filmtabletten\nKalydeco 150 mg Filmtabletten\nIvacaftor\n \nLesen Sie die gesamte Packungsbeilage so...,"<p class=""TitleA""><span lang=""DE"">B. PACKUNGSBEILAGE</span></p><b><span lang=""DE"" style='font-size:11.0pt;font-family:""Times New Roman"",serif'><br clear=""all"" style=""page-break-before:always""/> </..."
1,516,15003,CAP,,Was in dieser Packungsbeilage steht,15001.0,Was in dieser Packungsbeilage steht,41,5d1873a3-99b5-4325-9157-42068cf44ea3,0,15001.0,\nWas in dieser Packungsbeilage steht\n \n1. Was ist Kalydeco und wofür wird es angewendet?\n2. Was sollten Sie vor der Einnahme von Kalydeco beachten?\n3. Wie ist Kalydeco einzu...,"<p class=""MsoNormal"" style=""margin-right:-.1pt;page-break-after:avoid""><b><span lang=""DE"">Was in dieser Packungsbeilage steht</span></b></p><p class=""MsoNormal"" style=""margin-right:-.1pt;page-brea..."
2,517,15004,CAP,1.0,Was ist X und wofür wird es angewendet?,15001.0,1. Was ist Kalydeco und wofür wird es angewendet?,51,c0cc6cc4-9be5-484a-86b4-62e2cfc22fdd,0,15001.0,\n1. Was ist Kalydeco und wofür wird es angewendet?\n \nKalydeco enthält den Wirkstoff Ivacaftor. Ivacaftor wirkt auf der Ebene des Cystic Fibrosis Transmembrane Conductance Regulators (CFTR...,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.1pt;margin-bottom:0in; margin-left:28.35pt;margin-bottom:.0001pt;text-indent:-28.35pt;page-break-after: avoid""><b><span lang=""DE"">1. ..."
3,518,15005,CAP,2.0,Was sollten Sie vor der <Einnahme> <Anwendung> von X beachten?,15001.0,2. Was sollten Sie vor der Einnahme von Kalydeco beachten?,63,80cf8fe9-b7a4-42a9-976d-e2ee52f4a180,0,15001.0,\n2. Was sollten Sie vor der Einnahme von Kalydeco beachten?\n,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.1pt;margin-bottom:0in; margin-left:28.35pt;margin-bottom:.0001pt;text-indent:-28.35pt;page-break-after: avoid""><b><span lang=""DE"">2. ..."
4,519,15006,CAP,,"X darf nicht <eingenommen> <angewendet> werden<,>",15005.0,"Kalydeco darf nicht eingenommen werden,",65,6af8394d-0cb9-4864-80a8-a256369eef78,0,15005.0,"\nKalydeco darf nicht eingenommen werden,\n \n· wenn Sie allergisch gegen Ivacaftor oder einen der in Abschnitt 6. genannten sonstigen Bestandteile dieses Arzneimittels sind.\n","<p class=""MsoNormal"" style=""page-break-after:avoid""><b><span lang=""DE"">Kalydeco darf nicht eingenommen werden,</span></b></p><p class=""MsoNormal"" style=""page-break-after:avoid""><b><span lang=""DE"">..."
5,520,15007,CAP,,Warnhinweise und Vorsichtsmaßnahmen,15005.0,Warnhinweise und Vorsichtsmaßnahmen,69,95a7456e-3dc2-47eb-b1e3-96ca9f6f7aa8,0,15005.0,"\nWarnhinweise und Vorsichtsmaßnahmen\n \n· Bitte sprechen Sie mit Ihrem Arzt, wenn Sie Leberprobleme haben oder in der Vergangenheit hatten. Ihr Arzt muss möglicherweise Ihre Dosis anpass...","<p class=""MsoNormal"" style=""page-break-after:avoid""><b><span lang=""DE"">Warnhinweise und Vorsichtsmaßnahmen</span></b></p><p class=""MsoNormal"" style=""page-break-after:avoid""><b><span lang=""DE""> </s..."
6,521,15008,CAP,,Kinder <und Jugendliche>,15005.0,Kinder und Jugendliche,91,bd8e2ef6-ec5a-4d4e-838c-2f72cb6f680f,0,15005.0,"\nKinder und Jugendliche\n \nWenden Sie dieses Arzneimittel nicht bei Kindern unter 4 Monaten an, da nicht bekannt ist, ob Ivacaftor bei diesen Kindern sicher und wirksam ist.\n \nWenden Sie diese...","<p class=""MsoNormal"" style=""page-break-after:avoid""><b><span lang=""DE"">Kinder und Jugendliche</span></b></p><p class=""MsoNormal"" style=""page-break-after:avoid""><b><span lang=""DE""> </span></b></p><..."
7,522,15009,CAP,,<Einnahme> <Anwendung> von X zusammen mit anderen Arzneimitteln,15005.0,Einnahme von Kalydeco zusammen mit anderen Arzneimitteln,97,1c44afe3-9ab4-40ca-968a-15211732d200,0,15005.0,"\nEinnahme von Kalydeco zusammen mit anderen Arzneimitteln\n \nInformieren Sie Ihren Arzt oder Apotheker, wenn Sie andere Arzneimittel einnehmen/anwenden, kürzlich andere Arzneimittel eingenommen/...","<p class=""MsoNormal"" style=""margin-right:-.1pt;page-break-after:avoid""><b><span lang=""DE"">Einnahme von Kalydeco zusammen mit anderen Arzneimitteln</span></b></p><p class=""MsoNormal"" style=""margin-..."
8,523,15010,CAP,,"<Einnahme> <Anwendung> von X zusammen mit <Nahrungsmitteln> <und> <,> <Getränken> <und> <Alkohol>",15005.0,Einnahme von Kalydeco zusammen mit Nahrungsmitteln und Getränken,111,6e138aea-1361-488f-9bf3-882e650e9820,0,15005.0,"\nEinnahme von Kalydeco zusammen mit Nahrungsmitteln und Getränken\n \nWährend der Behandlung mit Kalydeco ist auf Speisen oder Getränke, die Grapefruit enthalten, zu verzichten, da sie die Nebenw...","<p class=""MsoNormal"" style=""margin-right:-.1pt;page-break-after:avoid""><b><span lang=""DE"">Einnahme von Kalydeco zusammen mit Nahrungsmitteln und Getränken</span></b></p><p class=""MsoNormal"" style=..."
9,524,15011,CAP,,"Schwangerschaft <und> <,> Stillzeit <und Fortpflanzungsfähigkeit>",15005.0,Schwangerschaft und Stillzeit,115,e3707a19-a5be-46b1-9635-240ee89d320b,0,15005.0,"\nSchwangerschaft und Stillzeit\n \nWenn Sie schwanger sind oder stillen, oder wenn Sie vermuten, schwanger zu sein, oder beabsichtigen, schwanger zu werden, fragen Sie vor der Einnahme dieses Arz...","<p class=""MsoNormal"" style=""margin-right:-.1pt;page-break-after:avoid""><b><span lang=""DE"">Schwangerschaft und Stillzeit</span></b></p><p class=""MsoNormal"" style=""margin-right:-.1pt;page-break-afte..."


2021-04-26 02:00:24,010 : XmlGeneration_W : Initiating XML Generation


Completed Extracting Content Between Heading


2021-04-26 02:00:24,362 : XmlGeneration_W : Writing to File:emea-combined-h-2494-de_ PACKUNGSBEILAGE.xml


Created XML File For :- emea-combined-h-2494-de_ PACKUNGSBEILAGE.json


## English

In [322]:
languageCode = 'en'

### Kalydeco



### SmPC

In [323]:
documentNumber = 0
docFilter = "SmPC.json"
stopWordFilterLen = 6
stopWordlanguage = 'english'
start=0
end=1

In [324]:
a,b,c = parseDocuments(procedureType,
               languageCode,
               documentNumber,
               docFilter,
               fileNameQrd,
               fileNameMatchRuleBook,
               fileNameDocumentTypeNames,
               stopWordFilterLen,
               start,
               end
              )

F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\en
['Kalydeco II-86-PI-clean_ PACKAGE LEAFLET.json', 'Kalydeco II-86-PI-clean_ANNEX II.json', 'Kalydeco II-86-PI-clean_ANNEX III.json', 'Kalydeco II-86-PI-clean_SmPC.json']
['Kalydeco II-86-PI-clean_SmPC.json']
Starting Heading Extraction For File :- Kalydeco II-86-PI-clean_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\en\Kalydeco II-86-PI-clean_SmPC.json
--------------------------------------------
SmPC
----------------------------------
RemovedByStyle  ||    ||  SUMMARY OF PRODUCT CHARACTERISTICS  ||  SUMMARY OF PRODUCT CHARACTERISTICS
----------------------------------
True  ||    ||  1.       NAME OF THE MEDICINAL PRODUCT  ||  1. NAME OF THE MEDICINAL PRODUCT
True  ||    ||  2.       QUALITATIVE AND QUANTITATIVE COMPOSITION  ||  2. QUALITATIVE AND QUANTITATIVE COMPOSITION
True  ||  <=4|11.11|(95, 89, 95)|0.99|  ||  Excipient with known effect  ||  

True  ||    ||  6.2     Incompatibilities  ||  6.2 Incompatibilities
True  ||    ||  6.3       Shelf life  ||  6.3 Shelf life
True  ||    ||  6.4     Special precautions for storage  ||  6.4 Special precautions for storage
True  ||  SpecialCase1|158.33|(48, 81, 86)|0.69|  ||  6.5     Nature and contents of container  ||  6.5 Nature and contents of container <and special equipment for use administration or implantation>
True  ||  SpecialCase2|14.63|(93, 88, 95)|0.98|  ||  6.6     Special precautions for disposal and other handling  ||  6.6 Special precautions for disposal <and other handling>
True  ||    ||  7.       MARKETING AUTHORISATION HOLDER  ||  7. MARKETING AUTHORISATION HOLDER
True  ||    ||  8.       MARKETING AUTHORISATION NUMBER(S)  ||  8. MARKETING AUTHORISATION NUMBER(S)
True  ||    ||  9.       DATE OF FIRST AUTHORISATION/RENEWAL OF THE AUTHORISATION  ||  9. DATE OF FIRST AUTHORISATION/RENEWAL OF THE AUTHORISATION
True  ||    ||  10.     DATE OF REVISION OF THE TEXT  ||  

2021-04-26 02:19:51,614 : ExtractContentBetween_9 : Cleaning Match Results
2021-04-26 02:19:51,617 : ExtractContentBetween_9 : Finished Cleaning Match Results
2021-04-26 02:19:51,645 : ExtractContentBetween_9 : Extracting Content Between Headings
2021-04-26 02:19:51,665 : ExtractContentBetween_9 : Finished Extracting Content Between Headings


Error Found
Completed Document Annotation For File
Starting Extracting Content Between Heading For File :- Kalydeco II-86-PI-clean_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\en\Kalydeco II-86-PI-clean_SmPC.json
--------------------------------------------


Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id,Text,Html_betw
0,682,20003,CAP,1.0,NAME OF THE MEDICINAL PRODUCT,20001,1. NAME OF THE MEDICINAL PRODUCT,32,f20bd308-1923-4f21-8683-66eb29755c2e,0,,\n1. NAME OF THE MEDICINAL PRODUCT\n \nKalydeco 150 mg film‑coated tablets\n \n,"<h1 style=""margin:0in""><span lang=""EN-GB"" style='font-size:11.0pt;font-family: ""Times New Roman"",serif'>1. NAME OF THE MEDICINAL PRODUCT</span></h1><p class=""MsoNormal"" style=""text-align:jus..."
1,683,20004,CAP,2.0,QUALITATIVE AND QUANTITATIVE COMPOSITION,20001,2. QUALITATIVE AND QUANTITATIVE COMPOSITION,37,a50bfad6-1567-401b-b2c3-99171b61af4e,0,20003.0,\n2. QUALITATIVE AND QUANTITATIVE COMPOSITION\n \nEach film‑coated tablet contains 150 mg of ivacaftor.\n,"<h1 style=""margin:0in;line-height:normal""><span lang=""EN-GB"" style='font-size: 11.0pt;font-family:""Times New Roman"",serif;color:black'>2. QUALITATIVE AND QUANTITATIVE COMPOSITION</span></h1>..."
2,686,20007,CAP,,Excipient(s) with known effect,20006,Excipient with known effect,41,d3ef64e9-4b77-42dc-b527-9b87608c11d6,0,20004.0,"\nExcipient with known effect\n \nEach film‑coated tablet contains 167.2 mg of lactose monohydrate.\n \nFor the full list of excipients, see section 6.1.\n \n","<p class=""MsoNormal"" style=""line-height:normal;page-break-after:avoid""><u><span lang=""EN-GB"">Excipient with known effect</span></u></p><p class=""MsoNormal"" style=""line-height:normal;page-break-aft..."
3,687,20008,CAP,3.0,PHARMACEUTICAL FORM,20001,3. PHARMACEUTICAL FORM,48,0ba3a956-8595-4184-954a-f349954c864f,0,20004.0,"\n3. PHARMACEUTICAL FORM\n \nFilm‑coated tablet (tablet)\n \nLight blue, capsule‑shaped film‑coated tablets, printed with “V 150” in black ink on one side and plain on the other (16.5 mm x 8...","<h1 style=""margin:0in;line-height:normal""><span lang=""EN-GB"" style='font-size: 11.0pt;font-family:""Times New Roman"",serif'>3. PHARMACEUTICAL FORM</span></h1><p class=""MsoNormal"" style=""text-..."
4,688,20009,CAP,4.0,CLINICAL PARTICULARS,20001,4. CLINICAL PARTICULARS,55,e85b46ff-3005-4ed8-b065-000d07a0e856,0,20008.0,\n4. CLINICAL PARTICULARS\n,"<h1 style=""margin:0in;line-height:normal""><span lang=""EN-GB"" style='font-size: 11.0pt;font-family:""Times New Roman"",serif;text-transform:uppercase'>4. </span><span lang=""EN-GB"" style='font-s..."
5,689,20010,CAP,4.1,Therapeutic indications,20009,4.1 Therapeutic indications,57,2872c9dd-124d-4fd9-91d7-894db0aad587,0,20009.0,"\n4.1 Therapeutic indications\n \nKalydeco tablets are indicated: \n· As monotherapy for the treatment of adults, adolescents, and children aged 6 years and older and weighing 25 kg or...","<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;line-height: normal;page-break-after:avoid""><b><span lang=""EN-GB"">4.1 Therapeutic indications</span></b></p><p class=""MsoNo..."
6,690,20011,CAP,4.2,Posology and method of administration,20009,4.2 Posology and method of administration,66,680152bb-85ba-4116-9c32-6865ff420d79,0,20009.0,"\n4.2 Posology and method of administration\n \nKalydeco should only be prescribed by physicians with experience in the treatment of cystic fibrosis. If the patient's genotype is unknown, an a...","<p class=""MsoNormal"" style=""line-height:normal;page-break-after:avoid""><b><span lang=""EN-GB"">4.2 Posology and method of administration</span></b></p><p class=""MsoNormal"" style=""text-align:just..."
7,691,20012,CAP,,Posology,20011,Posology,70,1ecae279-28ff-4045-8365-26ba75c85240,0,20011.0,"\nPosology\n \nAdults, adolescents and children aged 6 years and older should be dosed according to Table 1. \n \nTable 1: Dosing recommendations\n Morning Evening Ivacaftor as monothe...","<p class=""MsoNormal"" style=""line-height:normal;page-break-after:avoid""><u><span lang=""EN-GB"">Posology</span></u></p><p class=""MsoNormal"" style=""line-height:normal;page-break-after:avoid""><u><span ..."
8,692,20013,CAP,,Paediatric population,20012,Paediatric population,116,81df52ce-40ad-4778-bbf7-92040468e6c9,0,20012.0,\nPaediatric population\n \nThe safety and efficacy of ivacaftor monotherapy in children aged less than 4 months have not been established. No data are available.\n \nAn appropriate dose for child...,"<p class=""MsoNormal"" style=""line-height:normal""><u><span lang=""EN-GB"">Paediatric population</span></u></p><p class=""MsoNormal"" style=""line-height:normal""><u><span lang=""EN-GB""><span style=""text-de..."
9,693,20014,CAP,,Method of administration,20011,Method of administration,126,7e1bdf6f-b7da-4d35-8937-ebc12daa1b3e,0,20011.0,"\nMethod of administration\n \nFor oral use. \n \nPatients should be instructed to swallow the tablets whole. The tablets should not be chewed, crushed, or broken before swallowing because there a...","<p class=""MsoNormal"" style=""line-height:normal;page-break-after:avoid""><u><span lang=""EN-GB"">Method of administration</span></u></p><p class=""MsoNormal"" style=""line-height:normal;page-break-after:..."


2021-04-26 02:19:51,721 : XmlGeneration_Y : Initiating XML Generation
2021-04-26 02:19:51,721 : XmlGeneration_Y : Initiating XML Generation


Completed Extracting Content Between Heading


2021-04-26 02:19:51,911 : XmlGeneration_Y : Writing to File:Kalydeco II-86-PI-clean_SmPC.xml
2021-04-26 02:19:51,911 : XmlGeneration_Y : Writing to File:Kalydeco II-86-PI-clean_SmPC.xml


Created XML File For :- Kalydeco II-86-PI-clean_SmPC.json


### Annex II

In [326]:
documentNumber = 1
docFilter = "ANNEX II.json"
stopWordFilterLen = 6
stopWordlanguage = 'english'
start=0
end=1

In [327]:
a,b,c = parseDocuments(procedureType,
               languageCode,
               documentNumber,
               docFilter,
               fileNameQrd,
               fileNameMatchRuleBook,
               fileNameDocumentTypeNames,
               stopWordFilterLen,
               start,
               end
              )

F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\en
['Kalydeco II-86-PI-clean_ PACKAGE LEAFLET.json', 'Kalydeco II-86-PI-clean_ANNEX II.json', 'Kalydeco II-86-PI-clean_ANNEX III.json', 'Kalydeco II-86-PI-clean_SmPC.json']
['Kalydeco II-86-PI-clean_ANNEX II.json']
Starting Heading Extraction For File :- Kalydeco II-86-PI-clean_ANNEX II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\en\Kalydeco II-86-PI-clean_ANNEX II.json
--------------------------------------------
AnnexII

OriginalCheck
('<=4|50.0|(50, 50, 100)|0.67|', 'Annex II', 'ANNEX II')

True  ||    ||  Annex II  ||  ANNEX II
True  ||  Contains<>|118.75|(61, 91, 86)|0.81|  ||  A.      MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE   ||  A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE
True  ||    ||  B.      CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE  ||  B. CONDITIONS OR RESTRICTIO

2021-04-26 02:33:05,999 : ExtractContentBetween_K : Cleaning Match Results
2021-04-26 02:33:05,999 : ExtractContentBetween_K : Cleaning Match Results
2021-04-26 02:33:06,002 : ExtractContentBetween_K : Finished Cleaning Match Results
2021-04-26 02:33:06,002 : ExtractContentBetween_K : Finished Cleaning Match Results
2021-04-26 02:33:06,009 : ExtractContentBetween_K : Extracting Content Between Headings
2021-04-26 02:33:06,009 : ExtractContentBetween_K : Extracting Content Between Headings
2021-04-26 02:33:06,016 : ExtractContentBetween_K : Finished Extracting Content Between Headings
2021-04-26 02:33:06,016 : ExtractContentBetween_K : Finished Extracting Content Between Headings


True  ||    ||  ·         Obligation to conduct post-authorisation measures  ||  Obligation to conduct post-authorisation measures

OriginalCheck
('Contains<>|274.07|(8, 13, 86)|0.33|', 'The MAH shall complete, within the stated timeframe, the below measures:', 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FOR\r\n<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>')



Heading Not Found 
 ['Name and address of the manufacturer(s) of the biological active substance(s)', 'Official batch release', 'Additional risk minimisation measures', 'SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FOR\r\n<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>']


dict_keys([])
Completed Heading Extraction For File
Starting Document Annotation For File :- Kalydeco II-86-PI-clean_ANNEX II.json
Error Found
Completed Document Annotation For File
Starting Extracting Content Bet

Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id,Text,Html_betw
0,740,21001,CAP,,ANNEX II,,Annex II,25,b0a1fce6-7582-487f-8cbb-a7efef4cc182,0,,\nAnnex II\n,"<p align=""center"" class=""No-numheading3Agency"" style=""margin:0in;text-align:center; page-break-after:auto""><span lang=""EN-GB"" style='font-family:""Times New Roman"",serif; text-transform:uppercase'>..."
1,741,21002,CAP,A,<MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE,21001.0,A. MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE,27,99923573-32ab-42ab-aa98-d1db7066e587,0,21001.0,\nA. MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE \n,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt""><b><span lang=""EN-GB"">A. MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE </span></b></p><p class=""BodytextAgency"" style=""ma..."
2,744,21005,CAP,B,CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE,21001.0,B. CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE,29,d465c62a-6915-4da0-9595-1f42c9856a1f,0,21001.0,\nB. CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE\n,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt""><b><span lang=""EN-GB"">B. CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE</span></b></p><p class=""TitleB""><span lang=""..."
3,746,21007,CAP,C,OTHER CONDITIONS AND REQUIREMENTS OF THE MARKETING AUTHORISATION,21001.0,C. OTHER CONDITIONS AND REQUIREMENTS OF THE MARKETING AUTHORISATION,31,937ca248-82d8-4ac0-b996-117811048fec,0,21001.0,\nC. OTHER CONDITIONS AND REQUIREMENTS OF THE MARKETING AUTHORISATION\n,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt""><b><span lang=""EN-GB"">C. OTHER CONDITIONS AND REQUIREMENTS OF THE MARKETING AUTHORISATION</span></b></p><p class=""TitleB""..."
4,748,21009,CAP,D,CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT,21001.0,D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT,33,5d69d7fc-7a44-4f19-a081-ceb163b73627,0,21001.0,\nD. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT\n \n\n \n,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt""><b><span lang=""EN-GB"">D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT</s..."
5,741,21002,CAP,A,<MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE,21001.0,A. MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE,38,99342e5a-8f88-4a23-8760-1d6ce03a61ea,1,,\nA. MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE\n,"<p class=""TitleB""><span lang=""EN-GB"">A. MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE</span></p><p class=""BodytextAgency"" style=""margin-bottom:0in;line-height:normal""><span lang=""EN-GB"" style..."
6,743,21004,CAP,,Name and address of the manufacturer(s) responsible for batch release,21002.0,Name and address of the manufacturer(s) responsible for batch release,40,a88dc3cb-75da-44ef-a21c-7a9f37d7cc66,1,21002.0,\nName and address of the manufacturer(s) responsible for batch release\n \nAlmac Pharma Services (Ireland) Limited\nFinnabair Industrial Estate\nDundalk\nCo. Louth\nA91 P9KD\nIreland\n \nAlmac Ph...,"<p class=""MsoNormal"" style=""line-height:normal""><u><span lang=""EN-GB"">Name and address of the manufacturer(s) responsible for batch release</span></u></p><p class=""MsoNormal"" style=""line-height:no..."
7,744,21005,CAP,B,CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE,21001.0,B. CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE,58,0d7e0ccc-c60f-49e7-9cf8-143b0bd2aa6f,1,21004.0,"\nB. CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE\n \nMedicinal product subject to restricted medical prescription (see Annex I: Summary of Product Characteristics, section 4.2).\n","<p class=""TitleB""><span lang=""EN-GB"">B. CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE</span></p><p class=""NormalAgency""><span lang=""EN-GB"" style='font-size:11.0pt;font-family: ""Times Ne..."
8,746,21007,CAP,C,OTHER CONDITIONS AND REQUIREMENTS OF THE MARKETING AUTHORISATION,21001.0,C. Other conditions and requirements of the Marketing Authorisation,62,9e002f12-a442-41eb-8317-584dabb6f953,1,21005.0,\nC. Other conditions and requirements of the Marketing Authorisation\n,"<p class=""TitleB""><span lang=""EN-GB"">C. Other conditions and requirements of the Marketing Authorisation</span></p><p class=""MsoNormal""><span lang=""EN-GB""> </span></p>"
9,747,21008,CAP,,Periodic safety update reports (PSURs),21007.0,· Periodic safety update reports (PSURs),64,b28cbddd-a05b-43e7-85bd-804d0c1faf4d,1,21007.0,\n· Periodic safety update reports (PSURs)\n \nThe requirements for submission of PSURs for this medicinal product are set out in the list of Union reference dates (EURD list) provided for...,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.05pt;margin-bottom: 0in;margin-left:.25in;margin-bottom:.0001pt;text-indent:-.25in;line-height: normal""><span lang=""EN-GB"" style=""font-fam..."


2021-04-26 02:33:06,033 : XmlGeneration_e : Initiating XML Generation
2021-04-26 02:33:06,057 : XmlGeneration_e : Writing to File:Kalydeco II-86-PI-clean_ANNEX II.xml


Completed Extracting Content Between Heading
Created XML File For :- Kalydeco II-86-PI-clean_ANNEX II.json


### Labeling

In [328]:
documentNumber = 2
docFilter = "ANNEX III.json"
stopWordFilterLen = 6
stopWordlanguage = 'english'
start=0
end=1

In [329]:
a,b,c = parseDocuments(procedureType,
               languageCode,
               documentNumber,
               docFilter,
               fileNameQrd,
               fileNameMatchRuleBook,
               fileNameDocumentTypeNames,
               stopWordFilterLen,
               start,
               end
              )

F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\en
['Kalydeco II-86-PI-clean_ PACKAGE LEAFLET.json', 'Kalydeco II-86-PI-clean_ANNEX II.json', 'Kalydeco II-86-PI-clean_ANNEX III.json', 'Kalydeco II-86-PI-clean_SmPC.json']
['Kalydeco II-86-PI-clean_ANNEX III.json']
Starting Heading Extraction For File :- Kalydeco II-86-PI-clean_ANNEX III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\en\Kalydeco II-86-PI-clean_ANNEX III.json
--------------------------------------------
Labelling
True  ||  <=1|25.0|(86, 100, 95)|0.92|  ||  A. LABELLING  ||  LABELLING 
True  ||  Contains<>|111.76|(64, 85, 86)|0.88|  ||  PARTICULARS TO APPEAR ON THE OUTER PACKAGING  ||  PARTICULARS TO APPEAR ON <THE OUTER PACKAGING> <AND> <THE IMMEDIATE PACKAGING>
True  ||    ||  1.         NAME OF THE MEDICINAL PRODUCT  ||  1. NAME OF THE MEDICINAL PRODUCT
True  ||    ||  2.         STATEMENT OF ACTIVE SUBSTANCE(S)  ||  2. STATEMENT OF ACTIVE

True  ||    ||  15.       INSTRUCTIONS ON USE  ||  15. INSTRUCTIONS ON USE
True  ||    ||  16.       INFORMATION IN BRAILLE  ||  16. INFORMATION IN BRAILLE
True  ||    ||  17.       UNIQUE IDENTIFIER – 2D BARCODE  ||  17. UNIQUE IDENTIFIER – 2D BARCODE
True  ||  <=7|2.33|(98, 98, 100)|0.99|  ||  18.       UNIQUE IDENTIFIER – HUMAN READABLE DATA  ||  18. UNIQUE IDENTIFIER - HUMAN READABLE DATA
True  ||    ||  MINIMUM PARTICULARS TO APPEAR ON BLISTERS OR STRIPS  ||  MINIMUM PARTICULARS TO APPEAR ON BLISTERS OR STRIPS
True  ||    ||  1.         NAME OF THE MEDICINAL PRODUCT  ||  1. NAME OF THE MEDICINAL PRODUCT

OriginalCheck
('<=7|26.32|(87, 87, 92)|0.87|', '2.         NAME OF THE MARKETING AUTHORISATION HOLDER', '11. NAME AND ADDRESS OF THE MARKETING AUTHORISATION HOLDER')

True  ||    ||  2.         NAME OF THE MARKETING AUTHORISATION HOLDER  ||  2. NAME OF THE MARKETING AUTHORISATION HOLDER
True  ||    ||  3.         EXPIRY DATE  ||  3. EXPIRY DATE
True  ||  Contains<>|173.33|(54, 100

True  ||    ||  15.       INSTRUCTIONS ON USE  ||  15. INSTRUCTIONS ON USE
True  ||    ||  16.       INFORMATION IN BRAILLE  ||  16. INFORMATION IN BRAILLE
True  ||    ||  17.       UNIQUE IDENTIFIER – 2D BARCODE  ||  17. UNIQUE IDENTIFIER – 2D BARCODE
True  ||  <=7|2.33|(98, 98, 100)|0.99|  ||  18.       UNIQUE IDENTIFIER – HUMAN READABLE DATA  ||  18. UNIQUE IDENTIFIER - HUMAN READABLE DATA
True  ||    ||  1.         NAME OF THE MEDICINAL PRODUCT  ||  1. NAME OF THE MEDICINAL PRODUCT
oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
True  ||  <=7|9.38|(96, 100, 97)|0.99|  ||  2.         STATEMENT OF ACTIVE SUBSTANCE  ||  2. STATEMENT OF ACTIVE SUBSTANCE(S)
True  ||    ||  3.         LIST OF EXCIPIENTS  ||  3. LIST OF EXCIPIENTS
True  ||    ||  4.         PHARMACEUTICAL FORM AND CONTENTS  ||  4. PHARMACEUTICAL FORM AND CONTENTS
True  ||    ||  5.         METHOD AND ROUTE(S) OF ADMINISTRATION  ||  5. METHOD AND ROUT

True  ||    ||  MINIMUM PARTICULARS TO APPEAR ON SMALL IMMEDIATE PACKAGING UNITS  ||  MINIMUM PARTICULARS TO APPEAR ON SMALL IMMEDIATE PACKAGING UNITS
True  ||    ||  1.       NAME OF THE MEDICINAL PRODUCT AND ROUTE(S) OF ADMINISTRATION  ||  1. NAME OF THE MEDICINAL PRODUCT AND ROUTE(S) OF ADMINISTRATION

OriginalCheck
('<=7|51.85|(78, 74, 91)|0.85|', '2.       METHOD OF ADMINISTRATION', '5. METHOD AND ROUTE(S) OF ADMINISTRATION')

True  ||    ||  2.       METHOD OF ADMINISTRATION  ||  2. METHOD OF ADMINISTRATION
True  ||    ||  3.       EXPIRY DATE  ||  3. EXPIRY DATE
True  ||  Contains<>|173.33|(54, 100, 90)|0.87|  ||  4.       BATCH NUMBER  ||  4. BATCH NUMBER<, DONATION AND PRODUCT CODES>
True  ||    ||  5.       CONTENTS BY WEIGHT, BY VOLUME OR BY UNIT  ||  5. CONTENTS BY WEIGHT, BY VOLUME OR BY UNIT
True  ||    ||  6.       OTHER  ||  6. OTHER
oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
True  ||  Contain

2021-04-26 02:34:42,462 : ExtractContentBetween_R : Cleaning Match Results
2021-04-26 02:34:42,467 : ExtractContentBetween_R : Finished Cleaning Match Results
2021-04-26 02:34:42,498 : ExtractContentBetween_R : Extracting Content Between Headings



All mandatory headings have been found !!!

dict_keys([])
Completed Heading Extraction For File
Starting Document Annotation For File :- Kalydeco II-86-PI-clean_ANNEX III.json
Error Found
Completed Document Annotation For File
Starting Extracting Content Between Heading For File :- Kalydeco II-86-PI-clean_ANNEX III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\en\Kalydeco II-86-PI-clean_ANNEX III.json
--------------------------------------------


2021-04-26 02:34:43,043 : ExtractContentBetween_R : Finished Extracting Content Between Headings


Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id,Text,Html_betw
0,753,22001,CAP,,LABELLING,,A. LABELLING,66,d3dc3b68-fa13-4e32-888e-68f5482bacc0,0,,\nA. LABELLING\n \n\n \n,"<p class=""TitleA""><span lang=""EN-GB"">A. LABELLING</span></p><span lang=""EN-GB"" style='font-size:11.0pt;font-family:""Times New Roman"",serif; color:black'><br clear=""all"" style=""page-break-before:al..."
1,754,22002,CAP,,PARTICULARS TO APPEAR ON <THE OUTER PACKAGING> <AND> <THE IMMEDIATE PACKAGING>,22001.0,PARTICULARS TO APPEAR ON THE OUTER PACKAGING,71,6561322c-ce30-4c5f-abb7-c190b4f2f183,0,22001.0,\nPARTICULARS TO APPEAR ON THE OUTER PACKAGING\n \nOuter carton FOR BLISTER – 56-TABLET PACK\n \n \n,"<p class=""MsoNormal"" style=""line-height:normal""><span lang=""EN-GB""> </span></p><p class=""MsoNormal"" style=""line-height:normal""><span lang=""EN-GB""> </span></p><div style=""border:solid windowtext 1...."
2,755,22003,CAP,1.0,NAME OF THE MEDICINAL PRODUCT,22002.0,1. NAME OF THE MEDICINAL PRODUCT,77,77797b48-9992-403a-9aa0-fff3ee0ba850,0,22002.0,\n1. NAME OF THE MEDICINAL PRODUCT\n \nKalydeco 150 mg film‑coated tablets\nivacaftor\n \n \n,"<p class=""MsoNormal"" style=""margin-left:.5in;text-indent:-.5in;line-height:normal""><span lang=""EN-GB""> </span></p><p class=""MsoNormal"" style=""margin-left:.5in;text-indent:-.5in;line-height:normal""..."
3,756,22004,CAP,2.0,STATEMENT OF ACTIVE SUBSTANCE(S),22002.0,2. STATEMENT OF ACTIVE SUBSTANCE(S),84,2728ad67-c144-44f2-a67e-d923de282215,0,22002.0,\n2. STATEMENT OF ACTIVE SUBSTANCE(S)\n \nEach tablet contains 150 mg of ivacaftor.\n \n \n,"<p class=""MsoNormal"" style=""margin-left:.5in;text-indent:-.5in;line-height:normal""><span lang=""EN-GB""> </span></p><p class=""MsoNormal"" style=""margin-left:.5in;text-indent:-.5in;line-height:normal""..."
4,757,22005,CAP,3.0,LIST OF EXCIPIENTS,22002.0,3. LIST OF EXCIPIENTS,90,830c5f59-3587-47b0-88c2-f6f2decfe585,0,22002.0,\n3. LIST OF EXCIPIENTS\n \nContains lactose.\n \nSee leaflet for further information.\n \n \n,"<p class=""MsoNormal"" style=""margin-left:.5in;text-indent:-.5in;line-height:normal""><span lang=""EN-GB""> </span></p><p class=""MsoNormal"" style=""margin-left:.5in;text-indent:-.5in;line-height:normal""..."
5,758,22006,CAP,4.0,PHARMACEUTICAL FORM AND CONTENTS,22002.0,4. PHARMACEUTICAL FORM AND CONTENTS,98,60742f20-10f1-44d5-80a3-43da7c266804,0,22002.0,\n4. PHARMACEUTICAL FORM AND CONTENTS\n \n56 tablets\n \n \n,"<p class=""MsoNormal"" style=""margin-left:.5in;text-indent:-.5in;line-height:normal""><span lang=""EN-GB""> </span></p><p class=""MsoNormal"" style=""margin-left:.5in;text-indent:-.5in;line-height:normal""..."
6,759,22007,CAP,5.0,METHOD AND ROUTE(S) OF ADMINISTRATION,22002.0,5. METHOD AND ROUTE(S) OF ADMINISTRATION,104,172ffc1d-7746-4cb4-88e9-6ef873c1e25c,0,22002.0,"\n5. METHOD AND ROUTE(S) OF ADMINISTRATION\n \nRead the package leaflet before use.\n \nOral use \n \nInstructions for use\n \nTake with fat-containing food. \n \nDo not break, chew or dis...","<p class=""MsoNormal"" style=""margin-left:.5in;text-indent:-.5in;line-height:normal""><span lang=""EN-GB""> </span></p><p class=""MsoNormal"" style=""margin-left:.5in;text-indent:-.5in;line-height:normal""..."
7,760,22008,CAP,6.0,SPECIAL WARNING THAT THE MEDICINAL PRODUCT MUST BE STORED OUT OF THE SIGHT AND REACH OF CHILDREN,22002.0,6. SPECIAL WARNING THAT THE MEDICINAL PRODUCT MUST BE STORED OUT OF THE SIGHT AND REACH OF CHILDREN,118,882e37f6-a2ce-4612-a44b-93dfcc51c148,0,22002.0,\n6. SPECIAL WARNING THAT THE MEDICINAL PRODUCT MUST BE STORED OUT OF THE SIGHT AND REACH OF CHILDREN\n \nKeep out of the sight and reach of children.\n \n \n,"<p class=""MsoNormal"" style=""margin-left:.5in;text-indent:-.5in;line-height:normal""><span lang=""EN-GB""> </span></p><p class=""MsoNormal"" style=""margin-left:.5in;text-indent:-.5in;line-height:normal""..."
8,761,22009,CAP,7.0,"OTHER SPECIAL WARNING(S), IF NECESSARY",22002.0,"7. OTHER SPECIAL WARNING(S), IF NECESSARY",124,67ef58b7-02b3-4f51-932e-eaeb8d0bfbd8,0,22002.0,"\n7. OTHER SPECIAL WARNING(S), IF NECESSARY\n \n \n","<p class=""MsoNormal"" style=""margin-left:.5in;text-indent:-.5in;line-height:normal""><span lang=""EN-GB""> </span></p><p class=""MsoNormal"" style=""margin-left:.5in;text-indent:-.5in;line-height:normal""..."
9,762,22010,CAP,8.0,EXPIRY DATE,22002.0,8. EXPIRY DATE,128,1d219060-671c-46ab-a24c-d779b86de48a,0,22002.0,\n8. EXPIRY DATE\n \nEXP\n \n \n,"<p class=""MsoNormal"" style=""margin-left:.5in;text-indent:-.5in;line-height:normal; page-break-after:avoid""><span lang=""EN-GB""> </span></p><p class=""MsoNormal"" style=""margin-left:.5in;text-indent:-..."


2021-04-26 02:34:43,172 : XmlGeneration_O : Initiating XML Generation


Completed Extracting Content Between Heading


2021-04-26 02:34:43,435 : XmlGeneration_O : Writing to File:Kalydeco II-86-PI-clean_ANNEX III.xml


Created XML File For :- Kalydeco II-86-PI-clean_ANNEX III.json


### Package Leaflet

In [330]:
documentNumber = 3
docFilter = "LEAFLET.json"
stopWordFilterLen = 100
stopWordlanguage = 'english'
start=0
end=1
isPackageLeaflet = True

In [331]:
a,b,c = parseDocuments(procedureType,
               languageCode,
               documentNumber,
               docFilter,
               fileNameQrd,
               fileNameMatchRuleBook,
               fileNameDocumentTypeNames,
               stopWordFilterLen,
               start,
               end,
               isPackageLeaflet,
               "Kalydeco"
              )

F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\en
['Kalydeco II-86-PI-clean_ PACKAGE LEAFLET.json', 'Kalydeco II-86-PI-clean_ANNEX II.json', 'Kalydeco II-86-PI-clean_ANNEX III.json', 'Kalydeco II-86-PI-clean_SmPC.json']
['Kalydeco II-86-PI-clean_ PACKAGE LEAFLET.json']
Starting Heading Extraction For File :- Kalydeco II-86-PI-clean_ PACKAGE LEAFLET.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\partitionedJSONs\en\Kalydeco II-86-PI-clean_ PACKAGE LEAFLET.json
--------------------------------------------
Package leaflet
True  ||  <=4|16.67|(91, 100, 95)|0.91|  ||  B. PACKAGE LEAFLET  ||  PACKAGE LEAFLET
True  ||    ||  What is in this leaflet  ||  What is in this leaflet
----------------------------------
RemovedByStyle  ||    ||  1.   What Kalydeco is and what it is used for  ||  1. What Kalydeco is and what it is used for
----------------------------------
----------------------------------
RemovedByStyle  ||  Contains<

True  ||    ||  6.              Contents of the pack and other information  ||  6. Contents of the pack and other information
True  ||    ||  What Kalydeco contains  ||  What Kalydeco contains 
True  ||    ||  What Kalydeco looks like and contents of the pack  ||  What Kalydeco looks like and contents of the pack
True  ||  Contains<>|63.33|(76, 100, 90)|0.95|  ||  Marketing Authorisation Holder  ||  Marketing Authorisation Holder <and Manufacturer>
True  ||  SpecialCase1|84.38|(70, 100, 90)|0.94|  ||  This leaflet was last revised in   ||  This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.
True  ||    ||  Other sources of information  ||  Other sources of information


2021-04-26 02:37:43,224 : ExtractContentBetween_C : Cleaning Match Results
2021-04-26 02:37:43,224 : ExtractContentBetween_C : Cleaning Match Results
2021-04-26 02:37:43,230 : ExtractContentBetween_C : Finished Cleaning Match Results
2021-04-26 02:37:43,230 : ExtractContentBetween_C : Finished Cleaning Match Results
2021-04-26 02:37:43,244 : ExtractContentBetween_C : Extracting Content Between Headings
2021-04-26 02:37:43,244 : ExtractContentBetween_C : Extracting Content Between Headings
2021-04-26 02:37:43,258 : ExtractContentBetween_C : Finished Extracting Content Between Headings
2021-04-26 02:37:43,258 : ExtractContentBetween_C : Finished Extracting Content Between Headings




Heading Not Found 
 ['q This medicine is subject to additional monitoring. This will allow quick identification of new safety information. You can help by reporting any side effects you may get. See the end of section 4 for how to report side effects.', 'Do not <take> <use> X', 'X contains {name the excipient(s)}', 'For any information about this medicine, please contact the local representative of the Marketing Authorisation Holder:', 'The following information is intended for healthcare professionals only:']


dict_keys(['1. What Kalydeco is and what it is used for', '2. What you need to know before you <take> <use> Kalydeco ', '3. How to <take> <use> Kalydeco ', '4. Possible side effects', '5. How to store Kalydeco', '6. Contents of the pack and other information', 'Kalydeco contains {name the excipient(s)}'])
Completed Heading Extraction For File
Starting Document Annotation For File :- Kalydeco II-86-PI-clean_ PACKAGE LEAFLET.json
Error Found
Completed Document Annotation For Fi

Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id,Text,Html_betw
0,786,23001,CAP,,PACKAGE LEAFLET,,B. PACKAGE LEAFLET,26,2e155332-ec5e-4fcd-9322-ad35c12af6d6,0,,\nB. PACKAGE LEAFLET\n \n \n\n \nPackage leaflet: Information for the patient\n \nKalydeco 150 mg film-coated tablets\nivacaftor\n \n \nRead all of this leaflet carefully before you start taking t...,"<p class=""TitleA""><span lang=""EN-GB"">B. PACKAGE LEAFLET</span></p><p align=""center"" class=""MsoNormal"" style=""text-align:center;line-height:normal""><span lang=""EN-GB""> </span></p><span lang=""EN-GB""..."
1,788,23003,CAP,,What is in this leaflet,23001.0,What is in this leaflet,43,499708a1-a643-4dba-a3b5-e575a206dd18,0,23001.0,\nWhat is in this leaflet\n \n1. What Kalydeco is and what it is used for\n2. What you need to know before you take Kalydeco\n3. How to take Kalydeco\n4. Possible side effects\n5. How to...,"<p class=""MsoNormal"" style=""margin-right:-.1pt;line-height:normal;page-break-after: avoid""><b><span lang=""EN-GB"">What is in this leaflet</span></b></p><p class=""MsoNormal"" style=""margin-right:-.1p..."
2,789,23004,CAP,1.0,What X is and what it is used for,23001.0,1. What Kalydeco is and what it is used for,53,a6c8c684-a127-43f5-b083-60c91d6790ea,0,23001.0,\n1. What Kalydeco is and what it is used for\n \nKalydeco contains the active ingredient ivacaftor. Ivacaftor acts at the level of the cystic fibrosis transmembrane conductance regul...,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.1pt;margin-bottom:0in; margin-left:28.5pt;margin-bottom:.0001pt;text-indent:-28.5pt;line-height:normal; page-break-after:avoid""><b><span l..."
3,790,23005,CAP,2.0,What you need to know before you <take> <use> X,23001.0,2. What you need to know before you take Kalydeco,64,b354febc-e886-41ad-a001-cb7532e21bdc,0,23001.0,\n2. What you need to know before you take Kalydeco\n \nDo not take Kalydeco\n \n· if you are allergic to ivacaftor or any of the other ingredients of this medicine (listed in...,"<p class=""MsoNormal"" style=""margin-left:0in;text-indent:0in;line-height:normal; page-break-after:avoid""><b><span lang=""EN-GB"" style=""text-transform:uppercase"">2.<span style='font:7.0pt ""Times New ..."
4,792,23007,CAP,,Warnings and precautions,23005.0,Warnings and precautions,70,32f0eed3-bc20-4f5b-b1b5-86db70e60310,0,23005.0,\nWarnings and precautions\n \n· Talk to your doctor if you have liver problems or have previously had them. Your doctor may need to adjust your dose.\n· Increased liver enzymes in...,"<p class=""MsoNormal"" style=""line-height:normal;page-break-after:avoid""><b><span lang=""EN-GB"">Warnings and precautions</span></b></p><p class=""MsoNormal"" style=""line-height:normal;page-break-after:..."
5,793,23008,CAP,,Children <and adolescents>,23005.0,Children and adolescents,91,71e06e6e-ed18-4c42-865f-1f226ff01497,0,23005.0,\nChildren and adolescents\n \nDo not give this medicine to children under 4 months of age as it is not known if ivacaftor is safe and effective in these children.\n \nDo not give this medicine in...,"<p class=""MsoNormal"" style=""line-height:normal;page-break-after:avoid""><b><span lang=""EN-GB"">Children and adolescents</span></b></p><p class=""MsoNormal"" style=""line-height:normal;page-break-after:..."
6,794,23009,CAP,,Other medicines and X,23005.0,Other medicines and Kalydeco,97,dbd563f8-92b1-49d1-9543-130039345ac7,0,23005.0,"\nOther medicines and Kalydeco\n \nTell your doctor or pharmacist if you are using, have recently used or might use any other medicines. Some medicines can affect how Kalydeco works or make side e...","<p class=""MsoNormal"" style=""margin-right:-.1pt;line-height:normal;page-break-after: avoid""><b><span lang=""EN-GB"">Other medicines and Kalydeco</span></b></p><p class=""MsoNormal"" style=""margin-right..."
7,795,23010,CAP,,"X with <food> <and> <,> <drink> <and> <alcohol>",23005.0,Kalydeco with food and drink,112,39f99c0d-9929-48f0-ac62-4c1fc04123c8,0,23005.0,\nKalydeco with food and drink\n \nAvoid food or drink containing grapefruit during treatment with Kalydeco as they may increase the side effects of Kalydeco by increasing the amount of ivacaftor ...,"<p class=""MsoNormal"" style=""line-height:normal;page-break-after:avoid""><b><span lang=""EN-GB"">Kalydeco with food and drink</span></b></p><p class=""MsoNormal"" style=""line-height:normal;page-break-af..."
8,796,23011,CAP,,"Pregnancy <and> <,> breast-feeding <and fertility>",23005.0,Pregnancy and breast-feeding,116,a5081b49-0eba-4090-af3c-80877f7cd69f,0,23005.0,"\nPregnancy and breast-feeding\n \nIf you are pregnant or breast-feeding, think you may be pregnant or are planning to have a baby, ask your doctor for advice before taking this medicine. It may b...","<p class=""MsoNormal"" style=""line-height:normal;page-break-after:avoid""><b><span lang=""EN-GB"">Pregnancy and breast-feeding</span></b></p><p class=""MsoNormal"" style=""line-height:normal;page-break-af..."
9,797,23012,CAP,,Driving and using machines,23005.0,Driving and using machines,122,cf1dcbfc-c215-49f9-92cf-0f5449b98103,0,23005.0,"\nDriving and using machines\n \nKalydeco can make you dizzy. If you feel dizzy, do not drive, cycle or use machines.\n \nImportant information about the contents of Kalydeco\n \nKalydeco contains...","<p class=""MsoNormal"" style=""margin-right:-.1pt;line-height:normal;page-break-after: avoid""><b><span lang=""EN-GB"">Driving and using machines</span></b></p><p class=""MsoNormal"" style=""margin-right:-..."


2021-04-26 02:37:43,300 : XmlGeneration_d : Initiating XML Generation
2021-04-26 02:37:43,404 : XmlGeneration_d : Writing to File:Kalydeco II-86-PI-clean_ PACKAGE LEAFLET.xml


Completed Extracting Content Between Heading
Created XML File For :- Kalydeco II-86-PI-clean_ PACKAGE LEAFLET.json


In [288]:
from match.rulebook.matchRulebook import MatchRuleBook

rules= MatchRuleBook(
            fileNameRuleBook= fileNameMatchRuleBook,
            procedureType= procedureType,
            languageCode= languageCode,
            documentNumber= 0).ruleDict



In [289]:
from match.matchStrings.matchStrings import MatchStrings

In [290]:
matchString = MatchStrings(0, rules, 6, 'german')


In [291]:
o,t = '6.5     Art und Inhalt des Behältnisses', '6.5 Art und Inhalt des Behältnisses <und spezielles Zubehör für den Gebrauch, die Anwendung oder die Implantation>'

#o = o.encode()
#print(o.decode())
matchString.matchStrings(o,t,52)

(False, 'SpecialCase1|180.0|(45, 77, 86)|0.67|')

In [337]:
fileNameDoc

NameError: name 'fileNameDoc' is not defined

In [353]:
from utils.logger.matchLogger import MatchLogger

In [354]:
 os.path.join(os.path.abspath(os.path.join('..')), 'code','utils','matchLog.txt')

'F:\\Projects\\EMA\\Repository\\EMA EPI PoC\\function_code\\code\\utils\\matchLog.txt'

In [355]:
m = MatchLogger("MatchLogger",'Kalydeco II-86-PI-clean_ PACKAGE LEAFLET.json', 'CAP', 'en', 'SmPC', fileNameLog = os.path.join(os.path.abspath(os.path.join('..')), 'code','utils','matchLog.txt'))

In [356]:
extra = {"custom_dimensions": m.customDimension}

In [357]:
m.logger.info("Match Found:",extra=extra)


2021-04-26 03:26:25,972 : MatchLogger : Match Found:
2021-04-26 03:26:25,972 : MatchLogger : Match Found:
2021-04-26 03:26:25,972 : MatchLogger : Match Found:
2021-04-26 03:26:25,972 : MatchLogger : Match Found:
