In [1]:
import sys, os
module_path = os.path.abspath(os.path.join('..'))
module_path = os.path.join(module_path, 'scripts')
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import pprint
import pandas as pd
import uuid
import json
import os
import glob
import re
import sys
from bs4 import NavigableString, BeautifulSoup
from collections import defaultdict
import random
import string

from utils.config import config
from utils.logger.logger import loggerCreator
from requests.exceptions import HTTPError

## ePI Modules
from parse.rulebook.rulebook import StyleRulesDictionary

from parse.extractor.parser import parserExtractor
from match.matchDocument.matchDocument import MatchDocument
from documentAnnotation.documentAnnotation import DocumentAnnotation
from htmlDocTypePartitioner.partition import DocTypePartitioner
from extractContentBetweenHeadings.dataBetweenHeadingsExtractor import DataBetweenHeadingsExtractor
from fhirXmlGenerator.fhirXmlGenerator import FhirXmlGenerator
from fhirService.fhirService import FhirService

%load_ext autoreload

%autoreload 2

In [5]:
def getRandomString(N):
    str_ = ''.join(random.choice(string.ascii_uppercase + string.digits \
            + string.ascii_lowercase) for _ in range(N))
    return str_



# Set Required Field for Parsing and Partition Modules

### Please ensure that your converted_html folder has html files in their specific language folders

Example: If your language code is en, please ensure that all html files reside in the converted_html/en folder. If folder is not present, a folder not found exception will be thrown

### English

In [6]:
ePILanguage = 'en'
fileNameQrd = 'qrd_canonical_mode_CAP_NAP.csv'
procedureType = 'CAP'

### German

In [7]:
ePILanguage = 'de'
fileNameQrd = 'qrd_canonical_mode_CAP_NAP.csv'
procedureType = 'CAP'

### Spanish

In [9]:
ePILanguage = 'es'
fileNameQrd = 'qrd_canonical_mode_CAP_NAP.csv'
procedureType = 'CAP'

# Html Parsing Stage

In [10]:
class FolderNotFoundError(Exception):
    pass

## Generate input folder path
module_path = os.path.abspath(os.path.join('..'))
module_path = os.path.join(module_path, 'data')
module_path = os.path.join(module_path, 'converted_to_html')
module_path = os.path.join(module_path, ePILanguage)

## Generate output folder path
output_json_path = module_path.replace('converted_to_html','outputJSON')

"""
    Check if input folder exists, else throw exception
"""
if(os.path.exists(module_path)):
    filenames = glob.glob(os.path.join(module_path, '*.html'))
    filenames.extend(glob.glob(os.path.join(module_path, '*.htm')))
    
    ## Create language specific folder in outputJSON folder if it doesn't exist
    if(not os.path.exists(output_json_path)):
        os.mkdir(output_json_path)
    logger = loggerCreator('Parser_'+ getRandomString(1))
    
    styleRulesObj = StyleRulesDictionary(loggerCreator('Style Dictionary_'+ getRandomString(1)),
                                     language = ePILanguage,
                                     fileName = fileNameQrd,
                                     procedureType = procedureType)

    parserObj = parserExtractor(config, logger, styleRulesObj.styleRuleDict, 
                            styleRulesObj.styleFeatureKeyList, 
                            styleRulesObj.qrd_section_headings)

    for input_filename in filenames:
#     if(input_filename.find('emea-combined-h-2494-es')!=-1):
        output_filename = input_filename.replace('converted_to_html','outputJSON')
        output_filename = output_filename.replace('.html','.json')
        output_filename = output_filename.replace('.htm','.json')
        print(input_filename, output_filename)
        parserObj.createPIJsonFromHTML(input_filepath = input_filename,
                                       output_filepath = output_filename,
                                       img_base64_dict= parserObj.convertImgToBase64(input_filename),
                                      )
else:
    raise FolderNotFoundError(module_path + " not found")

2021-04-28 14:05:39,012 : Style Dictionary_l : Creating default style dictionary in file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\styleRules\rule_dictionary_es.json
2021-04-28 14:05:39,049 : Style Dictionary_l : Qrd Section Keys Generated: ANEXO I, ANEXO II, ANEXO III, B. PROSPECTO


C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\converted_to_html\es\emea-combined-h-2494-es.htm C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\outputJSON\es\emea-combined-h-2494-es.json


2021-04-28 14:05:44,023 : Parser_T : Writing to file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\outputJSON\es\emea-combined-h-2494-es.json


# Partition Stage

In [6]:
styleRulesObj = StyleRulesDictionary(loggerCreator('Style Dictionary_'+ getRandomString(1)), 
                                     language = ePILanguage,
                                     fileName = fileNameQrd,
                                     procedureType = procedureType)

path_json = os.path.join(os.path.abspath(os.path.join('..')), 'data', 'outputJSON', ePILanguage)

partitionlogger = loggerCreator('Partition_'+ getRandomString(1))
partitioner = DocTypePartitioner(partitionlogger)
partitioner.partitionHtmls(styleRulesObj.qrd_section_headings, path_json)

2021-04-26 14:47:47,706 : Style Dictionary_K : Reading style dictionary in file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\styleRules\rule_dictionary_de.json
2021-04-26 14:47:47,742 : Style Dictionary_K : Qrd Section Keys Generated: ANHANG I, ANHANG II, ANHANG III, B. PACKUNGSBEILAGE
2021-04-26 14:47:47,744 : Partition_g : Partitioning Json: emea-combined-h-2494-de.json


*************************** Texts with more than 2 characters**************************************


Unnamed: 0,Element,ID,Styles,Classes,Bold,Italics,Uppercased,Underlined,Indexed,IsListItem,HasBorder,IsPossibleHeading,IsHeadingType,Text,ParentId
0,"<div class=""WordSection1""> <p align=""center"" class=""MsoNormal"" style=""text-align:center""><b><span lang=""DE""> </span></b></p> <p align=""center"" class=""MsoNormal"" style=""text-align:center""><b><span ...",c2fa6683-2938-4ca2-90fe-b6f8c775a015,,['WordSection1'],False,False,False,False,False,False,False,False,,...,3eb2e153-bc1e-4848-b7db-42b79c750fa7
24,"<p align=""center"" class=""MsoNormal"" style=""text-align:center;line-height:13.0pt""><b><span lang=""DE"">ANHANG I</span></b></p>",b08f51ee-2061-4632-b2b8-90cbc4d6ad39,text-align:center;line-height:13.0pt,['MsoNormal'],True,False,True,False,False,False,False,True,,ANHANG I,c2fa6683-2938-4ca2-90fe-b6f8c775a015
26,"<p class=""TitleA""><span lang=""DE"">ZUSAMMENFASSUNG DER MERKMALE DES ARZNEIMITTELS</span></p>",02271640-d5b1-47a5-99b5-22a5f3f876ea,,['TitleA'],True,False,True,False,False,False,False,False,,ZUSAMMENFASSUNG DER MERKMALE DES ARZNEIMITTELS,c2fa6683-2938-4ca2-90fe-b6f8c775a015
33,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;page-break-after: avoid""><b><span lang=""DE"">1. BEZEICHNUNG DES ARZNEIMITTELS</span></b></p>",973c38f0-80b6-4d2c-94dd-d904a8941a74,margin-left:28.35pt;text-indent:-28.35pt;page-break-after:\navoid,['MsoNormal'],True,False,True,False,True,False,False,True,L1,1. BEZEICHNUNG DES ARZNEIMITTELS,c2fa6683-2938-4ca2-90fe-b6f8c775a015
35,"<p class=""MsoNormal""><span lang=""DE"">Kalydeco 75 mg Filmtabletten</span></p>",c6d7d7ff-c831-4eaf-83df-0b6617b839d8,,['MsoNormal'],False,False,False,False,False,False,False,False,,Kalydeco 75 mg Filmtabletten,c2fa6683-2938-4ca2-90fe-b6f8c775a015


2021-04-26 14:47:47,929 : Partition_g : Writing partition to file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\partitionedJSONs\de\emea-combined-h-2494-de_SmPC.json


*************************** Texts with more than 2 characters**************************************


Unnamed: 0,Element,ID,Styles,Classes,Bold,Italics,Uppercased,Underlined,Indexed,IsListItem,HasBorder,IsPossibleHeading,IsHeadingType,Text,ParentId
1246,"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><b><span lang=""DE"" style=""text-transform:uppercase"">ANhang II</span></b></p>",a368e379-aa0d-40c9-87b9-d28db934159e,text-align:center,['MsoNormal'],True,False,True,False,False,False,False,True,,ANhang II,c2fa6683-2938-4ca2-90fe-b6f8c775a015
1248,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom: 0in;margin-left:85.05pt;margin-bottom:.0001pt;text-indent:-35.4pt;line-height: 13.0pt""><b><span lang=""DE"">A. HE...",cab7b27a-7d40-404a-aa64-9af8779b0e62,margin-top:0in;margin-right:70.8pt;margin-bottom:\n0in;margin-left:85.05pt;margin-bottom:.0001pt;text-indent:-35.4pt;line-height:\n13.0pt,['MsoNormal'],True,False,True,False,True,False,False,True,L1,"A. HERSTELLER, DER FÜR DIE CHARGENFREIGABE VERANTWORTLICH IST",c2fa6683-2938-4ca2-90fe-b6f8c775a015
1250,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom: 0in;margin-left:85.05pt;margin-bottom:.0001pt;text-indent:-35.4pt;line-height: 13.0pt""><b><span lang=""DE"">B. BE...",d90aa509-eb21-463c-9b0a-6753dd0a3d80,margin-top:0in;margin-right:70.8pt;margin-bottom:\n0in;margin-left:85.05pt;margin-bottom:.0001pt;text-indent:-35.4pt;line-height:\n13.0pt,['MsoNormal'],True,False,True,False,True,False,False,True,L1,B. BEDINGUNGEN ODER EINSCHRÄNKUNGEN FÜR DIE ABGABE UND DEN GEBRAUCH,c2fa6683-2938-4ca2-90fe-b6f8c775a015
1252,"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom: 0in;margin-left:85.05pt;margin-bottom:.0001pt;text-indent:-35.4pt;line-height: 13.0pt""><b><span lang=""DE"">C. </...",49d0759e-f742-4b48-a165-8aafe5ef8caa,margin-top:0in;margin-right:70.8pt;margin-bottom:\n0in;margin-left:85.05pt;margin-bottom:.0001pt;text-indent:-35.4pt;line-height:\n13.0pt,['MsoNormal'],True,False,True,False,True,False,False,True,L1,C. SONSTIGE BEDINGUNGEN UND AUFLAGEN DER GENEHMIGUNG FÜR DAS INVERKEHRBRINGEN,c2fa6683-2938-4ca2-90fe-b6f8c775a015
1254,"<p class=""NormalAgency"" style=""margin-left:85.05pt;text-indent:-35.4pt""><b><span lang=""DE"" style='font-size:11.0pt;font-family:""Times New Roman"",serif'>D.</span></b><span lang=""DE"" style='font-siz...",cac700fc-c655-4cac-a82b-ecc4334f65f6,margin-left:85.05pt;text-indent:-35.4pt,['NormalAgency'],True,False,True,False,True,False,False,True,L1,D. BEDINGUNGEN ODER EINSCHRÄNKUNGEN FÜR DIE SICHERE UND WIRKSAME ANWENDUNG DES ARZNEIMITTELS,c2fa6683-2938-4ca2-90fe-b6f8c775a015


2021-04-26 14:47:48,025 : Partition_g : Writing partition to file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\partitionedJSONs\de\emea-combined-h-2494-de_ANHANG II.json


*************************** Texts with more than 2 characters**************************************


Unnamed: 0,Element,ID,Styles,Classes,Bold,Italics,Uppercased,Underlined,Indexed,IsListItem,HasBorder,IsPossibleHeading,IsHeadingType,Text,ParentId
1331,"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><b><span lang=""DE"">ANHANG III</span></b></p>",e955a9a7-a525-46ea-849a-d4757b825ce9,text-align:center,['MsoNormal'],True,False,True,False,False,False,False,True,L1,ANHANG III,c2fa6683-2938-4ca2-90fe-b6f8c775a015
1333,"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><b><span lang=""DE"">ETIKETTIERUNG UND PACKUNGSBEILAGE</span></b></p>",ca906ac9-5429-44ad-851d-fb8fec3165b5,text-align:center,['MsoNormal'],True,False,True,False,False,False,False,True,L1,ETIKETTIERUNG UND PACKUNGSBEILAGE,c2fa6683-2938-4ca2-90fe-b6f8c775a015
1359,"<p class=""TitleA""><span lang=""DE"">A. ETIKETTIERUNG</span></p>",d1e1a849-437a-4d31-bc5f-2fd1a26c031c,,['TitleA'],True,False,True,False,True,False,False,True,L2,A. ETIKETTIERUNG,c2fa6683-2938-4ca2-90fe-b6f8c775a015
1363,"<div style=""border:solid windowtext 1.0pt;padding:1.0pt 4.0pt 1.0pt 4.0pt""> <p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;border:none; padding:0in""><b><span lang=""DE"">ANGABE...",e90f9bbd-c944-40a9-b219-85a19b9c13a6,border:solid windowtext 1.0pt;padding:1.0pt 4.0pt 1.0pt 4.0pt,,False,False,False,False,False,False,True,False,,,c2fa6683-2938-4ca2-90fe-b6f8c775a015
1364,"<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;border:none; padding:0in""><b><span lang=""DE"">ANGABEN AUF DER ÄUSSEREN UMHÜLLUNG</span></b></p>",a7e27c18-266b-4ad4-a142-45361d45123d,margin-left:28.35pt;text-indent:-28.35pt;border:none;\npadding:0in,['MsoNormal'],True,False,True,False,False,False,False,True,L1,ANGABEN AUF DER ÄUSSEREN UMHÜLLUNG,e90f9bbd-c944-40a9-b219-85a19b9c13a6


2021-04-26 14:47:48,104 : Partition_g : Writing partition to file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\partitionedJSONs\de\emea-combined-h-2494-de_ANHANG III.json


*************************** Texts with more than 2 characters**************************************


Unnamed: 0,Element,ID,Styles,Classes,Bold,Italics,Uppercased,Underlined,Indexed,IsListItem,HasBorder,IsPossibleHeading,IsHeadingType,Text,ParentId
3213,"<p class=""TitleA""><span lang=""DE"">B. PACKUNGSBEILAGE</span></p>",c3972417-d66d-4cc5-88fa-c647e224fc9e,,['TitleA'],True,False,True,False,True,False,False,True,,B. PACKUNGSBEILAGE,c2fa6683-2938-4ca2-90fe-b6f8c775a015
3217,"<p align=""center"" class=""MsoNormal"" style=""text-align:center""><b><span lang=""DE"">Gebrauchsinformation: Information für Patienten</span></b></p>",9d399792-f8ff-42f1-889c-ab23e7bfd23d,text-align:center,['MsoNormal'],True,False,False,False,False,False,False,True,L2,Gebrauchsinformation: Information für Patienten,c2fa6683-2938-4ca2-90fe-b6f8c775a015
3219,"<p align=""center"" class=""MsoNormal"" style=""text-align:center;page-break-after:avoid""><b><span lang=""DE"">Kalydeco 75 mg Filmtabletten</span></b></p>",11415641-b57c-46e4-872f-c3ce9a3b2c81,text-align:center;page-break-after:avoid,['MsoNormal'],True,False,False,False,False,False,False,True,L2,Kalydeco 75 mg Filmtabletten,c2fa6683-2938-4ca2-90fe-b6f8c775a015
3220,"<p align=""center"" class=""MsoNormal"" style=""text-align:center;page-break-after:avoid""><b><span lang=""DE"">Kalydeco 150 mg Filmtabletten</span></b></p>",e9fd8074-4c9e-4a82-848d-360d49e10c2f,text-align:center;page-break-after:avoid,['MsoNormal'],True,False,False,False,False,False,False,True,L2,Kalydeco 150 mg Filmtabletten,c2fa6683-2938-4ca2-90fe-b6f8c775a015
3221,"<p align=""center"" class=""MsoNormal"" style=""text-align:center;page-break-after:avoid""><span lang=""DE"">Ivacaftor</span></p>",67477362-c7bc-4474-88e3-35322973a8ec,text-align:center;page-break-after:avoid,['MsoNormal'],False,False,False,False,False,False,False,False,,Ivacaftor,c2fa6683-2938-4ca2-90fe-b6f8c775a015


2021-04-26 14:47:48,143 : Partition_g : Writing partition to file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\partitionedJSONs\de\emea-combined-h-2494-de_ PACKUNGSBEILAGE.json


# Matching Stage

In [27]:
# Set variables for the specific section.

previousHeadingRowFound = None
procedureType = 'CAP'
languageCode = 'en'
documentType = 'SmPC'
stopWordlanguage = 'english'
docFilter = 'SmPC.json'
fileNameDoc = 'Kalydeco II-86-PI-clean_SmPC.json'
fileNameQrd = 'qrd_canonical_mode_CAP_NAP.csv'
fileNameMatchRuleBook = 'ruleDict.json'

stopWordFilterLen = 6 
topHeadingsConsidered = 4
bottomHeadingsConsidered = 6
isPackageLeaflet = False

In [52]:
previousHeadingRowFound = None
procedureType = 'CAP'
languageCode = 'en'
documentType = 'AnnexII'
stopWordlanguage = 'english'
docFilter = 'ANNEX II.json'
fileNameDoc = 'Abilify-h-471-e_ANNEX II.json'
fileNameQrd = 'qrd_canonical_mode_CAP_NAP.csv'
fileNameMatchRuleBook = 'ruleDict.json'

stopWordFilterLen = 6 
topHeadingsConsidered = 4
bottomHeadingsConsidered = 6
isPackageLeaflet = False


In [54]:

# Set variables for the specific section.

previousHeadingRowFound = None
procedureType = 'CAP'
languageCode = 'en'
documentType = 'Package leaflet'
stopWordlanguage = 'english'
docFilter = 'LEAFLET.json'
fileNameDoc = 'Abilify-h-471-e_ PACKAGE LEAFLET.json'
fileNameQrd = 'qrd_canonical_mode_CAP_NAP.csv'
fileNameMatchRuleBook = 'ruleDict.json'

stopWordFilterLen = 100
topHeadingsConsidered = 5
bottomHeadingsConsidered = 10
isPackageLeaflet = True

In [28]:
matchDocObj = MatchDocument(procedureType,
                 languageCode,
                 documentType,
                 fileNameDoc,
                 fileNameQrd,
                 fileNameMatchRuleBook,
                 topHeadingsConsidered,
                 bottomHeadingsConsidered,
                 stopWordFilterLen,
                 stopWordlanguage,
                 isPackageLeaflet)

File being processed: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\partitionedJSONs\en\Kalydeco II-86-PI-clean_SmPC.json
--------------------------------------------


In [29]:
    df, coll = matchDocObj.matchHtmlHeaddingsWithQrd()

True  ||  <=7|3.12|(98, 97, 98)|0.99|  ||  1.       NAME OF THE MEDICINAL PRODUCT  ||  1 NAME OF THE MEDICINAL PRODUCT
True  ||  <=7|2.33|(99, 98, 99)|0.99|  ||  2.       QUALITATIVE AND QUANTITATIVE COMPOSITION  ||  2 QUALITATIVE AND QUANTITATIVE COMPOSITION
True  ||  <=4|11.11|(95, 89, 95)|0.99|  ||  Excipient with known effect  ||  Excipient(s) with known effect
True  ||  <=4|4.55|(98, 95, 98)|0.98|  ||  3.       PHARMACEUTICAL FORM  ||  3 PHARMACEUTICAL FORM
True  ||  <=4|4.35|(98, 95, 98)|0.99|  ||  4.       CLINICAL PARTICULARS  ||  4 CLINICAL PARTICULARS
True  ||    ||  4.1     Therapeutic indications  ||  4.1 Therapeutic indications
True  ||    ||  4.2     Posology and method of administration  ||  4.2 Posology and method of administration
True  ||    ||  Posology  ||  Posology
True  ||    ||  Paediatric population  ||  Paediatric population
True  ||    ||  4.3       Contraindications  ||  4.3 Contraindications
True  ||    ||  Paediatric population  ||  Paediatric population
Tr

True  ||  Contains<>|14.63|(93, 90, 95)|0.93|  ||  6.6     Special precautions for disposal and other handling  ||  6.6 Special precautions for disposal <and other handling>
True  ||  <=4|3.03|(98, 97, 98)|0.99|  ||  7.       MARKETING AUTHORISATION HOLDER  ||  7 MARKETING AUTHORISATION HOLDER
True  ||  <=4|2.78|(99, 97, 99)|0.99|  ||  8.       MARKETING AUTHORISATION NUMBER(S)  ||  8 MARKETING AUTHORISATION NUMBER(S)
True  ||  <=7|2.04|(99, 98, 99)|0.99|  ||  9.       DATE OF FIRST AUTHORISATION/RENEWAL OF THE AUTHORISATION  ||  9 DATE OF FIRST AUTHORISATION/RENEWAL OF THE AUTHORISATION
True  ||  <=4|4.55|(98, 95, 98)|0.98|  ||  10.     DATE OF REVISION OF THE TEXT  ||  10 DATE OF REVISION OF THE TEXT


Heading Not Found 
 ['SUMMARY OF PRODUCT CHARACTERISTICS', 'Method of administration ']




# Content Extraction Stage

In [32]:
extractContentlogger = loggerCreator('ExtractContentBetween_'+ getRandomString(1))
extractorObj = DataBetweenHeadingsExtractor(extractContentlogger, coll)
dfExtractedHierRR = extractorObj.extractContentBetweenHeadings(ePILanguage, 'Kalydeco II-86-PI-clean_SmPC.json')

2021-04-26 20:47:47,647 : ExtractContentBetween_R : Cleaning Match Results
2021-04-26 20:47:47,657 : ExtractContentBetween_R : Finished Cleaning Match Results
2021-04-26 20:47:47,747 : ExtractContentBetween_R : Extracting Content Between Headings
2021-04-26 20:47:47,812 : ExtractContentBetween_R : Finished Extracting Content Between Headings


File being processed: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\partitionedJSONs\en\Kalydeco II-86-PI-clean_SmPC.json
--------------------------------------------


Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id,Text,Html_betw
0,682,20003,CAP,1.0,NAME OF THE MEDICINAL PRODUCT,20001,1. NAME OF THE MEDICINAL PRODUCT,32,ac7e3cae-7fd6-41b8-94cf-305b41a00e91,0,,\n1. NAME OF THE MEDICINAL PRODUCT\n \nKalydeco 150 mg film‑coated tablets\n \n,"<h1 style=""margin:0in""><span lang=""EN-GB"" style='font-size:11.0pt;font-family: ""Times New Roman"",serif'>1. NAME OF THE MEDICINAL PRODUCT</span></h1><p class=""MsoNormal"" style=""text-align:jus..."
1,683,20004,CAP,2.0,QUALITATIVE AND QUANTITATIVE COMPOSITION,20001,2. QUALITATIVE AND QUANTITATIVE COMPOSITION,37,262cf6cd-c84d-4a28-9a78-0e8b33e28b92,0,20003.0,\n2. QUALITATIVE AND QUANTITATIVE COMPOSITION\n \nEach film‑coated tablet contains 150 mg of ivacaftor.\n,"<h1 style=""margin:0in;line-height:normal""><span lang=""EN-GB"" style='font-size: 11.0pt;font-family:""Times New Roman"",serif;color:black'>2. QUALITATIVE AND QUANTITATIVE COMPOSITION</span></h1>..."
2,686,20007,CAP,,Excipient(s) with known effect,20006,Excipient with known effect,41,28158d1e-d09f-4439-ac89-83421a3e2a01,0,20004.0,"\nExcipient with known effect\n \nEach film‑coated tablet contains 167.2 mg of lactose monohydrate.\n \nFor the full list of excipients, see section 6.1.\n \n","<p class=""MsoNormal"" style=""line-height:normal;page-break-after:avoid""><u><span lang=""EN-GB"">Excipient with known effect</span></u></p><p class=""MsoNormal"" style=""line-height:normal;page-break-aft..."
3,687,20008,CAP,3.0,PHARMACEUTICAL FORM,20001,3. PHARMACEUTICAL FORM,48,0ca8dd99-daba-438a-af65-4eff08b3dee2,0,20004.0,"\n3. PHARMACEUTICAL FORM\n \nFilm‑coated tablet (tablet)\n \nLight blue, capsule‑shaped film‑coated tablets, printed with “V 150” in black ink on one side and plain on the other (16.5 mm x 8...","<h1 style=""margin:0in;line-height:normal""><span lang=""EN-GB"" style='font-size: 11.0pt;font-family:""Times New Roman"",serif'>3. PHARMACEUTICAL FORM</span></h1><p class=""MsoNormal"" style=""text-..."
4,688,20009,CAP,4.0,CLINICAL PARTICULARS,20001,4. CLINICAL PARTICULARS,55,2f49d824-a6d1-44e3-b63d-ced3e2e38206,0,20008.0,\n4. CLINICAL PARTICULARS\n,"<h1 style=""margin:0in;line-height:normal""><span lang=""EN-GB"" style='font-size: 11.0pt;font-family:""Times New Roman"",serif;text-transform:uppercase'>4. </span><span lang=""EN-GB"" style='font-s..."
5,689,20010,CAP,4.1,Therapeutic indications,20009,4.1 Therapeutic indications,57,f53350f9-dd9f-43aa-8fce-3bb31ea4e162,0,20009.0,"\n4.1 Therapeutic indications\n \nKalydeco tablets are indicated: \n· As monotherapy for the treatment of adults, adolescents, and children aged 6 years and older and weighing 25 kg or...","<p class=""MsoNormal"" style=""margin-left:28.35pt;text-indent:-28.35pt;line-height: normal;page-break-after:avoid""><b><span lang=""EN-GB"">4.1 Therapeutic indications</span></b></p><p class=""MsoNo..."
6,690,20011,CAP,4.2,Posology and method of administration,20009,4.2 Posology and method of administration,66,56c56365-b661-49cc-b5d6-4f5d7ecc3926,0,20009.0,"\n4.2 Posology and method of administration\n \nKalydeco should only be prescribed by physicians with experience in the treatment of cystic fibrosis. If the patient's genotype is unknown, an a...","<p class=""MsoNormal"" style=""line-height:normal;page-break-after:avoid""><b><span lang=""EN-GB"">4.2 Posology and method of administration</span></b></p><p class=""MsoNormal"" style=""text-align:just..."
7,691,20012,CAP,,Posology,20011,Posology,70,60d0bd00-3cc5-4a1d-9c94-06e664136333,0,20011.0,"\nPosology\n \nAdults, adolescents and children aged 6 years and older should be dosed according to Table 1. \n \nTable 1: Dosing recommendations\n Morning Evening Ivacaftor as monothe...","<p class=""MsoNormal"" style=""line-height:normal;page-break-after:avoid""><u><span lang=""EN-GB"">Posology</span></u></p><p class=""MsoNormal"" style=""line-height:normal;page-break-after:avoid""><u><span ..."
8,692,20013,CAP,,Paediatric population,20012,Paediatric population,116,6388a71b-ea71-4fe4-adc5-40c00ed05119,0,20012.0,\nPaediatric population\n \nThe safety and efficacy of ivacaftor monotherapy in children aged less than 4 months have not been established. No data are available.\n \nAn appropriate dose for child...,"<p class=""MsoNormal"" style=""line-height:normal""><u><span lang=""EN-GB"">Paediatric population</span></u></p><p class=""MsoNormal"" style=""line-height:normal""><u><span lang=""EN-GB""><span style=""text-de..."
9,695,20016,CAP,4.3,Contraindications,20009,4.3 Contraindications,136,a60cdbf4-c466-47b6-ae62-b70df6747299,0,20009.0,\n4.3 Contraindications\n \nHypersensitivity to the active substance or to any of the excipients listed in section 6.1.\n,"<h2 style=""margin:0in""><span lang=""EN-GB"" style=""font-size:11.0pt"">4.3 Contraindications</span></h2><p class=""MsoNormal"" style=""line-height:normal;page-break-after:avoid;text-autospace: none..."


In [186]:

    documentAnnotationObj = DocumentAnnotation('Kalydeco II-86-PI-clean_SmPC.json','c270d6ccaf9e47e9b20b322e2383c4ba','https://spor-uat.azure-api.net/pms/api/v2/',df,coll)
    pms_oms_annotation_data = documentAnnotationObj.processRegulatedAuthorizationForDoc(['EU/2/10/106/006','EU/2/09/098/001'])


EU/2/10/106/006
Found entry with code 220000000061
['LOC-100010489', '600000033247']
Bovilis BTV8 (--) - Suspension for injection
EU/2/09/098/001
Found entry with code 220000000061
['0', '600000035065']
Melovem 5 mg/ml  - Solution for injection


# XML Generation Stage

In [189]:
xmlLogger = loggerCreator('XmlGeneration_'+ getRandomString(1))
fhirXmlGeneratorObj = FhirXmlGenerator(xmlLogger, pms_oms_annotation_data)
generated_xml = fhirXmlGeneratorObj.generateXml(dfExtractedHierRR,  'Kalydeco II-86-PI-clean_SmPC.xml')

2021-04-28 13:53:55,991 : XmlGeneration_L : Initiating XML Generation
DEBUG:XmlGeneration_L:Initiating XML Generation
2021-04-28 13:53:56,017 : XmlGeneration_L : Writing to File:Kalydeco II-86-PI-clean_SmPC.xml
INFO:XmlGeneration_L:Writing to File:Kalydeco II-86-PI-clean_SmPC.xml


In [178]:
fhirServiceObj =  FhirService(generated_xml)
fhirServiceObj.submitFhirXml()

POST sucessful: XML added with id abf232f9-7786-48d5-ad9f-4c581520af29
