In [2]:
import os
import zipfile
%load_ext autoreload

%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
import pprint
import pandas as pd
import uuid
import json
import os
import glob
import re
import sys
from bs4 import NavigableString, BeautifulSoup
from collections import defaultdict
import random
import string

from utils.config import config
from utils.logger.logger import loggerCreator

# ePI Modules
from parse.rulebook.rulebook import StyleRulesDictionary

from parse.extractor.parser import parserExtractor
from match.matchDocument.matchDocument import MatchDocument
from documentAnnotation.documentAnnotation import DocumentAnnotation
from htmlDocTypePartitioner.partition import DocTypePartitioner
from extractContentBetweenHeadings.dataBetweenHeadingsExtractor import DataBetweenHeadingsExtractor
from fhirXmlGenerator.fhirXmlGenerator import FhirXmlGenerator
from fhirService.fhirService import FhirService
from utils.logger.matchLogger import MatchLogger
from languageInfo.documentTypeNames.documentTypeNames import DocumentTypeNames


class FolderNotFoundError(Exception):
    pass


def getRandomString(N):
    str_ = ''.join(random.choice(string.ascii_uppercase + string.digits
                                 + string.ascii_lowercase) for _ in range(N))
    return str_


def convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog):

    module_path = os.path.join(basePath)

    if "/" in basePath:
        pathSep = "/"
    else:
        pathSep = "\\"
    
    # Generate output folder path
    output_json_path = os.path.join(basePath, 'outputJSON')

    """
        Check if input folder exists, else throw exception
    """
    if(os.path.exists(module_path)):
        filenames = glob.glob(os.path.join(module_path, htmlDocName))

        # Create language specific folder in outputJSON folder if it doesn't exist
        if(not os.path.exists(output_json_path)):
            os.mkdir(output_json_path)
        logger = MatchLogger(f'Parser_{getRandomString(1)}', htmlDocName,
                             domain, procedureType, languageCode, "HTML", fileNameLog)

        styleLogger = MatchLogger(
            f'Style Dictionary_{getRandomString(1)}', htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)

        styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                             controlBasePath=controlBasePath,
                                             language=languageCode,
                                             fileName=fileNameQrd,
                                             domain=domain,
                                             procedureType=procedureType
                                             )

        parserObj = parserExtractor(config, logger, styleRulesObj.styleRuleDict,
                                    styleRulesObj.styleFeatureKeyList,
                                    styleRulesObj.qrd_section_headings)

        for input_filename in filenames:
          # if(input_filename.find('Kalydeco II-86-PI-clean')!=-1):
            output_filename = os.path.join(output_json_path, htmlDocName)
            style_filepath =  output_filename.replace('.html','.txt')
            style_filepath =  style_filepath.replace('.txtl','.txt')
            style_filepath =  style_filepath.replace('.htm','.txt')
            print("-------------",style_filepath,"-----------------")

            output_filename = output_filename.replace('.html', '.json')
            output_filename = output_filename.replace('.htm', '.json')
            print(input_filename, output_filename)
            parserObj.createPIJsonFromHTML(input_filepath=input_filename,
                                           output_filepath=output_filename,
                                           style_filepath = style_filepath,
                                           img_base64_dict=parserObj.convertImgToBase64(input_filename)
                                           )
            
        return output_filename.split(pathSep)[-1], style_filepath
    else:
        try:    
            raise FolderNotFoundError(module_path + " not found")
        except:  
            logger.logFlowCheckpoint("Folder For Language Code Not Found In Input File")
            logger.logException("Folder For Language Code Not Found In Input File")
        raise FolderNotFoundError(module_path + " not found")
        return None


def splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog):

    styleLogger = MatchLogger(
        f'Style Dictionary_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                        controlBasePath=controlBasePath,
                                        language=languageCode,
                                        fileName=fileNameQrd,
                                        domain=domain,
                                        procedureType=procedureType
                                        )
    
    path_json = os.path.join(basePath,'outputJSON', fileNameJson)
    print("PathJson",path_json)
    partitionLogger = MatchLogger(
        f'Partition_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    partitioner = DocTypePartitioner(partitionLogger)

    partitionedJsonPaths = partitioner.partitionHtmls(
        styleRulesObj.qrd_section_headings, path_json)

    return partitionedJsonPaths


def extractAndValidateHeadings(controlBasePath,
                                basePath,
                                domain,
                                procedureType,
                                languageCode,
                                documentNumber,
                                fileNameDoc,
                                fileNameQrd,
                                fileNameMatchRuleBook,
                                fileNameDocumentTypeNames,
                                fileNameLog,
                                stopWordFilterLen=6,
                                isPackageLeaflet=False,
                                medName=None
                                ):

    if documentNumber == 0:
        topHeadingsConsidered = 4
        bottomHeadingsConsidered = 6
    elif documentNumber == 1:
        topHeadingsConsidered = 3
        bottomHeadingsConsidered = 5
    elif documentNumber == 2:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 15
    else:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 10

    print(f"Starting Heading Extraction For File :- {fileNameDoc}")
    logger = MatchLogger(f"Heading Extraction {fileNameDoc}_{getRandomString(1)}", fileNameDoc, domain, procedureType, languageCode, documentNumber, fileNameLog)
    logger.logFlowCheckpoint("Starting Heading Extraction")

    stopWordlanguage = DocumentTypeNames(
        controlBasePath=controlBasePath,
        fileNameDocumentTypeNames=fileNameDocumentTypeNames,
        languageCode=languageCode,
        domain=domain,
        procedureType=procedureType,
        documentNumber=documentNumber
        ).extractStopWordLanguage()

    matchDocObj = MatchDocument(
        logger,
        controlBasePath,
        basePath,
        domain,
        procedureType,
        languageCode,
        documentNumber,
        fileNameDoc,
        fileNameQrd,
        fileNameMatchRuleBook,
        fileNameDocumentTypeNames,
        topHeadingsConsidered,
        bottomHeadingsConsidered,
        stopWordFilterLen,
        stopWordlanguage,
        isPackageLeaflet,
        medName)
    df, coll = matchDocObj.matchHtmlHeaddingsWithQrd()

    return df, coll


def parseDocument(controlBasePath, basePath ,htmlDocName, fileNameQrd, fileNameMatchRuleBook, fileNameDocumentTypeNames, medName = None):
    
    if "/" in basePath:
        pathSep = "/"        
    else:
        pathSep = "\\"
    
    fileNameLog = os.path.join(basePath,'FinalLog.txt')

    pathComponents = basePath.split(pathSep)
    print(pathComponents, htmlDocName)
    timestamp = pathComponents[-1]
    languageCode =  pathComponents[-2]
    medName = pathComponents[-3]
    procedureType = pathComponents[-4]
    domain = pathComponents[-5]

    print(timestamp, languageCode, medName, procedureType, domain)
        
    flowLogger =  MatchLogger(f"Flow Logger HTML_{getRandomString(1)}", htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)

    flowLogger.logFlowCheckpoint("Starting HTML Conversion To Json")
    ###Convert Html to Json
    fileNameJson, stylesFilePath = convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog)
    
    print("stylePath:-",stylesFilePath)
    flowLogger.logFlowCheckpoint("Completed HTML Conversion To Json")

    flowLogger.logFlowCheckpoint("Starting Json Split")

    ###Split Uber Json to multiple Jsons for each category.
    partitionedJsonPaths = splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog)
    
    partitionedJsonPaths = [ path.split(pathSep)[-1] for path in partitionedJsonPaths]
    flowLogger.logFlowCheckpoint(str(partitionedJsonPaths))
    
    flowLogger.logFlowCheckpoint("Completed Json Split")
    
    flowLogger.logFlowCheckpoint("Started Processing Partitioned Jsons")
    
    for index, fileNamePartitioned in enumerate(partitionedJsonPaths):
        
        flowLogger.logFlowCheckpoint(f"\n\n\n\n||||||||||||||||||||||||||||||||{str(index)} ||||| {str(fileNamePartitioned)}||||||||||||||||||||||||||||||||\n\n\n\n")
        
        if index == 3:
            stopWordFilterLen = 100
            isPackageLeaflet = True
        else:
            stopWordFilterLen = 6
            isPackageLeaflet = False
            
        df, coll = extractAndValidateHeadings(controlBasePath,
                                    basePath,
                                    domain,
                                    procedureType,
                                    languageCode,
                                    index,
                                    fileNamePartitioned,
                                    fileNameQrd,
                                    fileNameMatchRuleBook,
                                    fileNameDocumentTypeNames,
                                    fileNameLog,
                                    stopWordFilterLen=stopWordFilterLen,
                                    isPackageLeaflet=isPackageLeaflet,
                                    medName=medName)
        
        
        print(f"Completed Heading Extraction For File")
        flowLogger.logFlowCheckpoint("Completed Heading Extraction For File")
        
        print(f"Starting Document Annotation For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Document Annotation For File")
        documentAnnotationObj = DocumentAnnotation(fileNamePartitioned,'c20835db4b1b4e108828a8537ff41506','https://spor-sit.azure-api.net/pms/api/v2/',df,coll)
        try:
            pms_oms_annotation_data = documentAnnotationObj.processRegulatedAuthorizationForDoc()
            print(pms_oms_annotation_data)
        except:
            pms_oms_annotation_data = None
            print("Error Found")
            
        print(f"Completed Document Annotation")        
        flowLogger.logFlowCheckpoint("Completed Document Annotation")
        
        print(f"Starting Extracting Content Between Heading For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Extracting Content Between Heading")
        
        extractContentlogger =  MatchLogger(f'ExtractContentBetween_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        extractorObj = DataBetweenHeadingsExtractor(extractContentlogger, basePath, coll)
        dfExtractedHierRR = extractorObj.extractContentBetweenHeadings(fileNamePartitioned)
        
        print(f"Completed Extracting Content Between Heading")        
        flowLogger.logFlowCheckpoint("Completed Extracting Content Between Heading")
        
        xmlLogger =  MatchLogger(f'XmlGeneration_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        fhirXmlGeneratorObj = FhirXmlGenerator(xmlLogger, controlBasePath, basePath, pms_oms_annotation_data, stylesFilePath, medName)
        fileNameXml = fileNamePartitioned.replace('.json','.xml')
        generatedXml = fhirXmlGeneratorObj.generateXml(dfExtractedHierRR, fileNameXml)
        
        fhirServiceLogger =  MatchLogger(f'XML Submission Logger_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)

        fhirServiceObj = FhirService(fhirServiceLogger, basePath, generatedXml)
        fhirServiceObj.submitFhirXml()
        print(f"Created XML File For :- {fileNamePartitioned}")      

        #return df,coll,dfExtractedHierRR
    
    flowLogger.logFlowCheckpoint("Completed Processing Partitioned Jsons")


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\vipsharm\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [4]:
inputZipFolderPath = "F:\Projects\EMA\Repository\EMA EPI PoC\\function_code\\inputblob"
inputZipFileName = "Adakveo_H_CAP_en_2021-05-12T12-47-53Z.zip"

In [5]:
fileNameQrd = 'qrd_canonical_model.csv'
fileNameMatchRuleBook = 'ruleDict.json'
fileNameDocumentTypeNames = 'documentTypeNames.json'
fsMountName = '/mounted'

info = inputZipFileName.split("_")

try:
    medName = info[0]
    domain = info[1]
    procedureType = info[2]
    languageCode = info[3]
    timestamp = info[4]
    timestamp = timestamp.replace(".zip","")

except Exception:
    raise f"Missing required info in the zip file name {inputZipFileName}"

if "\\" in os.getcwd():
    localEnv = True
    inputZipFolderPath = os.path.join(os.path.abspath(os.path.join('..')),inputZipFolderPath)
    outputFolderPath = os.path.join(os.path.abspath(os.path.join('..')), 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
    controlFolderPath = os.path.join(os.path.abspath(os.path.join('..')),'control')
else:
    localEnv = False
    inputZipFolderPath = os.path.join(f'{fsMountName}',inputZipFolderPath)
    outputFolderPath = os.path.join(f'{fsMountName}', 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
    controlFolderPath = os.path.join(f'{fsMountName}','control')


print(inputZipFileName, inputZipFolderPath, outputFolderPath, controlFolderPath)

mode = 0o666

if localEnv is True:
    inputZipFolderPath = inputZipFolderPath.replace("/","\\")
    outputFolderPath = outputFolderPath.replace("/","\\")
    controlFolderPath = controlFolderPath.replace("/","\\")

try:
    os.makedirs(inputZipFolderPath, mode)
    os.makedirs(outputFolderPath, mode)
    os.makedirs(controlFolderPath, mode)

except Exception:
    print("Already Present")
    
with zipfile.ZipFile(f'{inputZipFolderPath}/{inputZipFileName}',"r") as zip_ref:
        zip_ref.extractall(outputFolderPath)
    

_,_,fileNames = next(os.walk(outputFolderPath))
htmlFileName = [fileName for fileName in fileNames if ".htm" in fileName][0]

print(htmlFileName)



Adakveo_H_CAP_en_2021-05-12T12-47-53Z.zip F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z F:\Projects\EMA\Repository\EMA EPI PoC\function_code\control
Already Present
Adakveo_clean.htm


In [6]:
parseDocument(controlFolderPath, outputFolderPath, htmlFileName, fileNameQrd, fileNameMatchRuleBook, fileNameDocumentTypeNames, medName)

2021-05-12 18:46:57,453 : Flow Logger HTML_O : Starting HTML Conversion To Json | H | CAP |  en | HTML | Adakveo_clean.htm
2021-05-12 18:46:57,459 : Style Dictionary_N : Reading style dictionary in file: rule_dictionary_en.json | H | CAP |  en | HTML | Adakveo_clean.htm
2021-05-12 18:46:57,524 : Style Dictionary_N : Qrd Section Keys Retrieved For Style Dictionary: ANNEX I, ANNEX II, ANNEX III, B. PACKAGE LEAFLET | H | CAP |  en | HTML | Adakveo_clean.htm


['F:', 'Projects', 'EMA', 'Repository', 'EMA EPI PoC', 'function_code', 'work', 'H', 'CAP', 'Adakveo', 'en', '2021-05-12T12-47-53Z'] Adakveo_clean.htm
2021-05-12T12-47-53Z en Adakveo CAP H
------------- F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\outputJSON\Adakveo_clean.txt -----------------
F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\Adakveo_clean.htm F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\outputJSON\Adakveo_clean.json


2021-05-12 18:46:57,954 : Parser_M : Style Information Stored In File: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\outputJSON\Adakveo_clean.txt | H | CAP |  en | HTML | Adakveo_clean.htm
2021-05-12 18:46:59,516 : Parser_M : Writing to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\outputJSON\Adakveo_clean.json | H | CAP |  en | HTML | Adakveo_clean.htm
2021-05-12 18:46:59,619 : Flow Logger HTML_O : Completed HTML Conversion To Json | H | CAP |  en | HTML | Adakveo_clean.htm
2021-05-12 18:46:59,621 : Flow Logger HTML_O : Starting Json Split | H | CAP |  en | HTML | Adakveo_clean.htm
2021-05-12 18:46:59,625 : Style Dictionary_u : Reading style dictionary in file: rule_dictionary_en.json | H | CAP |  en | Json | Adakveo_clean.json
2021-05-12 18:46:59,661 : Style Dictionary_u : Qrd Section Keys Retrieved For Style Dictionary: ANNEX I, ANNEX II, ANNEX III, B. PACKAGE LEAFLET | H | CAP 

stylePath:- F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\outputJSON\Adakveo_clean.txt
PathJson F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\outputJSON\Adakveo_clean.json


2021-05-12 18:46:59,981 : Partition_W : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\partitionedJSONs\Adakveo_clean_SmPC.json | H | CAP |  en | Json | Adakveo_clean.json
2021-05-12 18:47:00,007 : Partition_W : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\partitionedJSONs\Adakveo_clean_ANNEX II.json | H | CAP |  en | Json | Adakveo_clean.json
2021-05-12 18:47:00,038 : Partition_W : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\partitionedJSONs\Adakveo_clean_ANNEX III.json | H | CAP |  en | Json | Adakveo_clean.json
2021-05-12 18:47:00,045 : Partition_W : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\partitionedJSONs\Adakveo_clean_ PACKAGE LEAFLET.json | H | CAP |  en | Json | Adakve

Starting Heading Extraction For File :- Adakveo_clean_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\partitionedJSONs\Adakveo_clean_SmPC.json
--------------------------------------------
SmPC


2021-05-12 18:47:00,300 : Heading Extraction Adakveo_clean_SmPC.json_i : Started Extracting Heading | H | CAP |  en | 0 | Adakveo_clean_SmPC.json
2021-05-12 18:47:00,431 : Heading Extraction Adakveo_clean_SmPC.json_i : Match Passed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | Doc txt :- 'SUMMARY OF PRODUCT CHARACTERISTICS' | Qrd txt :- 'SUMMARY OF PRODUCT CHARACTERISTICS' | Matched :- 'True'
2021-05-12 18:47:00,437 : Heading Extraction Adakveo_clean_SmPC.json_i : Validation Passed As This The First Heading | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | currHeadId :- '20001' | currParentHeadId :- 'nan' | prevParentHeadId :- ''



OriginalCheck



 reactions.' | Qrd txt :- 'qthis medicinal product is subject to additional monitoring. this will allow quick identification of new safety information. healthcare professionals are asked to report any suspected adverse reactions. see section 4.8 for how to report adverse reactions.' | Matched :- 'True'
 reactions.' | Qrd txt :- 'qThis medicinal product is subject to additional monitoring. This will allow quick identification of new safety information. Healthcare professionals are asked to report any suspected adverse reactions. See section 4.8 for how to report adverse reactions.' | Matched :- 'True'
2021-05-12 18:47:00,958 : Heading Extraction Adakveo_clean_SmPC.json_i : Validation Passed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | currHeadId :- '20002' | currParentHeadId :- '20001.0' | prevParentHeadId :- '20001'
2021-05-12 18:47:01,028 : Heading Extraction Adakveo_clean_SmPC.json_i : Match Passed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | Doc txt :- '1.       NAME OF THE ME




 QUANTITATIVE COMPOSITION' | Qrd txt :- '2. QUALITATIVE AND QUANTITATIVE COMPOSITION' | Matched :- 'True' | Adakveo_clean_SmPC.json | Doc txt :- '2.       QUALITATIVE AND
2021-05-12 18:47:01,785 : Heading Extraction Adakveo_clean_SmPC.json_i : Validation Passed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | currHeadId :- '20004' | currParentHeadId :- '20001.0' | prevParentHeadId :- '20003'
 concentrate for solution for infusion contains 10 mg crizanlizumab.' | Qrd txt :- '6.5 Nature and contents of container <and special equipment for use administration or implantation>' | Matched :- 'False'
 concentrate for solution for infusion contains 10 mg crizanlizumab.' | Qrd txt :- '6.6 Special precautions for disposal <and other handling>' | Matched :- 'False'
 10 ml contains 100 mg crizanlizumab.' | Qrd txt :- '6.6 Special precautions for disposal <and other handling>' | Matched :- 'False'| Doc txt :- 'One vial of
 recombinant DNA technology.' | Qrd txt :- '6.5 Nature and contents of contain

2021-05-12 18:47:10,109 : Heading Extraction Adakveo_clean_SmPC.json_i : Validation Passed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | currHeadId :- '20018' | currParentHeadId :- '20017.0' | prevParentHeadId :- '20017'
 batch number of the administered product should be clearly recorded.' | Qrd txt :- '6.5 Nature and contents of container <and special equipment for use administration or implantation>' | Matched :- 'False'
2021-05-12 18:47:10,989 : Heading Extraction Adakveo_clean_SmPC.json_i : Match Failed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | Doc txt :- 'Laboratory test interference: automated platelet counts' | Qrd txt :- '6.5 Nature and contents of container <and special equipment for use administration or implantation>' | Matched :- 'False'
2021-05-12 18:47:11,297 : Heading Extraction Adakveo_clean_SmPC.json_i : Match Passed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | Doc txt :- 'Excipients with known effect' | Qrd txt :- 'Excipient(s) with known effect' | Matched

2021-05-12 18:47:15,026 : Heading Extraction Adakveo_clean_SmPC.json_i : Validation Passed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | currHeadId :- '20028' | currParentHeadId :- '20027.0' | prevParentHeadId :- '20027'
2021-05-12 18:47:15,346 : Heading Extraction Adakveo_clean_SmPC.json_i : Match Passed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | Doc txt :- 'Reporting of suspected adverse reactions' | Qrd txt :- 'Reporting of suspected adverse reactions' | Matched :- 'True'
2021-05-12 18:47:15,358 : Heading Extraction Adakveo_clean_SmPC.json_i : Validation Passed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | currHeadId :- '20029' | currParentHeadId :- '20027.0' | prevParentHeadId :- '20027'
2021-05-12 18:47:15,438 : Heading Extraction Adakveo_clean_SmPC.json_i : Match Passed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | Doc txt :- '4.9     Overdose' | Qrd txt :- '4.9 Overdose' | Matched :- 'True'
2021-05-12 18:47:15,448 : Heading Extraction Adakveo_clean_SmPC.json_i : Valid

2021-05-12 18:47:18,961 : Heading Extraction Adakveo_clean_SmPC.json_i : Validation Passed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | currHeadId :- '20039' | currParentHeadId :- '20038.0' | prevParentHeadId :- '20038'
2021-05-12 18:47:19,295 : Heading Extraction Adakveo_clean_SmPC.json_i : Match Passed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | Doc txt :- 'Distribution' | Qrd txt :- 'Distribution' | Matched :- 'True'
2021-05-12 18:47:19,323 : Heading Extraction Adakveo_clean_SmPC.json_i : Validation Passed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | currHeadId :- '20040' | currParentHeadId :- '20038.0' | prevParentHeadId :- '20038'
2021-05-12 18:47:19,781 : Heading Extraction Adakveo_clean_SmPC.json_i : Match Passed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | Doc txt :- 'Biotransformation' | Qrd txt :- 'Biotransformation' | Matched :- 'True'
2021-05-12 18:47:19,796 : Heading Extraction Adakveo_clean_SmPC.json_i : Validation Passed | H | CAP |  en | 0 | Adakveo_clean_S

2021-05-12 18:47:25,363 : Heading Extraction Adakveo_clean_SmPC.json_i : Match Passed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | Doc txt :- '6.3     Shelf life' | Qrd txt :- '6.3 Shelf life' | Matched :- 'True'
2021-05-12 18:47:25,374 : Heading Extraction Adakveo_clean_SmPC.json_i : Validation Passed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | currHeadId :- '20050' | currParentHeadId :- '20047.0' | prevParentHeadId :- '20049'
 for storage' | Qrd txt :- '6.4 Special precautions for storage' | Matched :- 'True'd | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | Doc txt :- '6.4     Special precautions
2021-05-12 18:47:26,129 : Heading Extraction Adakveo_clean_SmPC.json_i : Validation Passed | H | CAP |  en | 0 | Adakveo_clean_SmPC.json | currHeadId :- '20051' | currParentHeadId :- '20047.0' | prevParentHeadId :- '20050'
 the outer carton in order to protect from light.' | Qrd txt :- '6.5 Nature and contents of container <and special equipment for use administration or implantation>



Heading Not Found 
 ['General description', 'Qualitative and quantitative composition', 'Excipient(s) with known effect', 'Precautions to be taken before handling or administering the medicinal product', 'Pharmacokinetic/pharmacodynamic relationship(s)', 'Environmental risk assessment (ERA)', 'Use in the paediatric population', 'DOSIMETRY', 'INSTRUCTIONS FOR PREPARATION OF RADIOPHARMACEUTICALS']


dict_keys([])
Completed Heading Extraction For File
Starting Document Annotation For File :- Adakveo_clean_SmPC.json
EU/1/20/1476/001


2021-05-12 18:47:40,029 : Flow Logger HTML_O : Completed Document Annotation | H | CAP |  en | HTML | Adakveo_clean.htm
2021-05-12 18:47:40,030 : Flow Logger HTML_O : Starting Extracting Content Between Heading | H | CAP |  en | HTML | Adakveo_clean.htm
2021-05-12 18:47:40,034 : ExtractContentBetween_0_w : Cleaning Match Results | H | CAP |  en | 0 | Adakveo_clean_SmPC.json
2021-05-12 18:47:40,040 : ExtractContentBetween_0_w : Finished Cleaning Match Results | H | CAP |  en | 0 | Adakveo_clean_SmPC.json
2021-05-12 18:47:40,076 : Flow Logger HTML_O : Completed Extracting Content Between Heading | H | CAP |  en | HTML | Adakveo_clean.htm
2021-05-12 18:47:40,115 : XmlGeneration_0_M : PMS/OMS Annotation Information Not Retrieved | H | CAP |  en | 0 | Adakveo_clean_SmPC.json
2021-05-12 18:47:40,116 : XmlGeneration_0_M : Initiating XML Generation | H | CAP |  en | 0 | Adakveo_clean_SmPC.json
2021-05-12 18:47:40,212 : XmlGeneration_0_M : Writing to File:Adakveo_clean_SmPC.xml | H | CAP |  en 

Error Found
Completed Document Annotation
Starting Extracting Content Between Heading For File :- Adakveo_clean_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\partitionedJSONs\Adakveo_clean_SmPC.json
--------------------------------------------
Completed Extracting Content Between Heading


Request time out. Ingestion may be backed up. Retrying.
Request time out. Ingestion may be backed up. Retrying.
Request time out. Ingestion may be backed up. Retrying.
2021-05-12 18:49:02,453 : XML Submission Logger_0_0 : Initiating Submission To FHIR Server | H | CAP |  en | 0 | Adakveo_clean_SmPC.json
2021-05-12 18:49:02,455 : XML Submission Logger_0_0 : Response{"resourceType":"Bundle","id":"57acd765-429b-471f-b613-19cddd08f8ae","meta":{"versionId":"1","lastUpdated":"2021-05-12T13:18:25.593+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:aec4f6e4-da38-404e-9cfd-42fa58f77e5c","resource":{"resourceType":"Bundle","id":"aa5ff8a1-0285-43ee-a7e0-a6ac60069cc6","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-05-12T13:17:40+00:00","entry":[{"fullUr | H | CAP |  en | 0 | Adakveo_clean_SmPC.json
2021-05-12 18:49:02,459 : XML Submission Logger_0_0 : POST sucessful: XML added with i

POST sucessful: XML added with id 57acd765-429b-471f-b613-19cddd08f8ae
Created XML File For :- Adakveo_clean_SmPC.json
Starting Heading Extraction For File :- Adakveo_clean_ANNEX II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\partitionedJSONs\Adakveo_clean_ANNEX II.json
--------------------------------------------
AnnexII


 product' | Qrd txt :- 'd. conditions or restrictions with regard to the safe and effective use of the medicinal product' | Matched :- 'True' Doc txt :- 'd.      conditions
 PRODUCT' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'True'NEX II.json | Doc txt :- 'D.      conditions
2021-05-12 18:49:02,696 : Heading Extraction Adakveo_clean_ANNEX II.json_P : Validation Flow Is Broken | H | CAP |  en | 1 | Adakveo_clean_ANNEX II.json | currHeadId :- '21009' | currParentHeadId :- '21001.0' | prevParentHeadId :- '21007'
2021-05-12 18:49:02,701 : Heading Extraction Adakveo_clean_ANNEX II.json_P : Validation Passed | H | CAP |  en | 1 | Adakveo_clean_ANNEX II.json | currHeadId :- '21009' | currParentHeadId :- '21001.0' | prevParentHeadId :- '21007'
 POST-AUTHORISATION MEASURES FOR THE CONDITIONAL MARKETING AUTHORISATION' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESP


OriginalCheck


OriginalCheck


OriginalCheck


OriginalCheck



 SUBSTANCE(S) AND MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Qrd txt :- 'Name and address of the manufacturer(s) responsible for batch release' | Matched :- 'False'     MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE
 SUBSTANCE(S) AND MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'False'CTIVE
2021-05-12 18:49:02,955 : Heading Extraction Adakveo_clean_ANNEX II.json_P : End Of Sub Section | H | CAP |  en | 1 | Adakveo_clean_ANNEX II.json
 SUBSTANCE(S) AND MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'True'
2021-05-12 18:49:02,972 : Heading Extraction Adakveo_clean_ANNEX II.json_P : Validation Passed As This The First Heading | H | CAP |  en | 1 | Adakveo_clean_ANNEX II.json | currHeadId :- '21002' | currParentHeadId :- '21001.0'

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo

OriginalCheck


OriginalCheck



 release' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'False' Doc txt :- 'Name and address of the manufacturer(s) responsible for batch
 release' | Qrd txt :- 'Name and address of the manufacturer(s) of the biological active substance(s)' | Matched :- 'False'an_ANNEX II.json | Doc txt :- 'Name and address of the manufacturer(s) responsible for batch
 release' | Qrd txt :- 'Name and address of the manufacturer(s) responsible for batch release' | Matched :- 'True'akveo_clean_ANNEX II.json | Doc txt :- 'Name and address of the manufacturer(s) responsible for batch
2021-05-12 18:49:03,173 : Heading Extraction Adakveo_clean_ANNEX II.json_P : Validation Passed | H | CAP |  en | 1 | Adakveo_clean_ANNEX II.json | currHeadId :- '21004' | currParentHeadId :- '21002.0' | prevParentHeadId :- '21003'
2021-05-12 18:49:03,326 : Heading Extraction Adakveo_clean_ANNEX II.json_P : Match Passed | H | CAP |  en | 


OriginalCheck



 6 months following authorisation.' | Qrd txt :- 'C. OTHER CONDITIONS AND REQUIREMENTS OF THE MARKETING AUTHORISATION' | Matched :- 'False'ANNEX II.json | Doc txt :- 'The marketing
 6 months following authorisation.' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'False'he marketing



OriginalCheck


OriginalCheck



 RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'False'
 RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-05-12 18:49:03,933 : Heading Extraction Adakveo_clean_ANNEX II.json_P : Validation Passed | H | CAP |  en | 1 | Adakveo_clean_ANNEX II.json | currHeadId :- '21009' | currParentHeadId :- '21001.0' | prevParentHeadId :- '21008'
2021-05-12 18:49:03,994 : Heading Extraction Adakveo_clean_ANNEX II.json_P : Match Passed | H | CAP |  en | 1 | Adakveo_clean_ANNEX II.json | Doc txt :- '·Risk management plan (RMP)' | Qrd txt :- 'Risk management plan (RMP)' | Matched :- 'True'
2021-05-12 18:49:04,003 : Heading Extraction Adakveo_clean_ANNEX II.json_P : Valida


OriginalCheck



Heading Not Found 
 ['Official batch release', 'Additional risk minimisation measures', 'Obligation to conduct post-authorisation measures']


dict_keys([])
Completed Heading Extraction For File


2021-05-12 18:49:04,553 : Flow Logger HTML_O : Starting Document Annotation For File | H | CAP |  en | HTML | Adakveo_clean.htm
2021-05-12 18:49:04,558 : Flow Logger HTML_O : Completed Document Annotation | H | CAP |  en | HTML | Adakveo_clean.htm
2021-05-12 18:49:04,561 : Flow Logger HTML_O : Starting Extracting Content Between Heading | H | CAP |  en | HTML | Adakveo_clean.htm
2021-05-12 18:49:04,567 : ExtractContentBetween_1_P : Cleaning Match Results | H | CAP |  en | 1 | Adakveo_clean_ANNEX II.json
2021-05-12 18:49:04,571 : ExtractContentBetween_1_P : Finished Cleaning Match Results | H | CAP |  en | 1 | Adakveo_clean_ANNEX II.json
2021-05-12 18:49:04,579 : Flow Logger HTML_O : Completed Extracting Content Between Heading | H | CAP |  en | HTML | Adakveo_clean.htm
2021-05-12 18:49:04,595 : XmlGeneration_1_2 : PMS/OMS Annotation Information Not Retrieved | H | CAP |  en | 1 | Adakveo_clean_ANNEX II.json
2021-05-12 18:49:04,596 : XmlGeneration_1_2 : Initiating XML Generation | H | C

Starting Document Annotation For File :- Adakveo_clean_ANNEX II.json
Error Found
Completed Document Annotation
Starting Extracting Content Between Heading For File :- Adakveo_clean_ANNEX II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\partitionedJSONs\Adakveo_clean_ANNEX II.json
--------------------------------------------
Completed Extracting Content Between Heading
Already Exists


2021-05-12 18:49:05,971 : XML Submission Logger_1_c : Initiating Submission To FHIR Server | H | CAP |  en | 1 | Adakveo_clean_ANNEX II.json
2021-05-12 18:49:05,973 : XML Submission Logger_1_c : Response{"resourceType":"Bundle","id":"318c6ede-b204-451b-8c81-795af0c7f60e","meta":{"versionId":"1","lastUpdated":"2021-05-12T13:19:05.652+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:9c006163-a468-468b-b48d-4b61c01e745f","resource":{"resourceType":"Bundle","id":"d0e69c2f-07c3-48a8-af40-9fb3c7a30b2f","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-05-12T13:19:04+00:00","entry":[{"fullUr | H | CAP |  en | 1 | Adakveo_clean_ANNEX II.json
2021-05-12 18:49:05,974 : XML Submission Logger_1_c : POST sucessful: XML added with id: 318c6ede-b204-451b-8c81-795af0c7f60e | H | CAP |  en | 1 | Adakveo_clean_ANNEX II.json
2021-05-12 18:49:05,976 : Flow Logger HTML_O : 



|||||||||||||||||||

POST sucessful: XML added with id 318c6ede-b204-451b-8c81-795af0c7f60e
Created XML File For :- Adakveo_clean_ANNEX II.json
Starting Heading Extraction For File :- Adakveo_clean_ANNEX III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\partitionedJSONs\Adakveo_clean_ANNEX III.json
--------------------------------------------
Labelling


2021-05-12 18:49:06,236 : Heading Extraction Adakveo_clean_ANNEX III.json_6 : Match Passed | H | CAP |  en | 2 | Adakveo_clean_ANNEX III.json | Doc txt :- 'A. LABELLING' | Qrd txt :- 'LABELLING ' | Matched :- 'True'
2021-05-12 18:49:06,240 : Heading Extraction Adakveo_clean_ANNEX III.json_6 : Validation Passed As This The First Heading | H | CAP |  en | 2 | Adakveo_clean_ANNEX III.json | currHeadId :- '22001' | currParentHeadId :- 'nan' | prevParentHeadId :- ''
2021-05-12 18:49:06,247 : Heading Extraction Adakveo_clean_ANNEX III.json_6 : Match Passed | H | CAP |  en | 2 | Adakveo_clean_ANNEX III.json | Doc txt :- 'PARTICULARS TO APPEAR ON THE OUTER PACKAGING' | Qrd txt :- 'PARTICULARS TO APPEAR ON <THE OUTER PACKAGING> <AND> <THE IMMEDIATE PACKAGING>' | Matched :- 'True'
2021-05-12 18:49:06,251 : Heading Extraction Adakveo_clean_ANNEX III.json_6 : Validation Passed | H | CAP |  en | 2 | Adakveo_clean_ANNEX III.json | currHeadId :- '22002' | currParentHeadId :- '22001.0' | prevParentHea

2021-05-12 18:49:08,829 : Heading Extraction Adakveo_clean_ANNEX III.json_6 : Match Passed | H | CAP |  en | 2 | Adakveo_clean_ANNEX III.json | Doc txt :- '17.     UNIQUE IDENTIFIER – 2D BARCODE' | Qrd txt :- '17. UNIQUE IDENTIFIER – 2D BARCODE' | Matched :- 'True'
2021-05-12 18:49:08,836 : Heading Extraction Adakveo_clean_ANNEX III.json_6 : Validation Passed | H | CAP |  en | 2 | Adakveo_clean_ANNEX III.json | currHeadId :- '22019' | currParentHeadId :- '22002.0' | prevParentHeadId :- '22018'
 READABLE DATA' | Qrd txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Matched :- 'True' | CAP |  en | 2 | Adakveo_clean_ANNEX III.json | Doc txt :- '18.     UNIQUE IDENTIFIER - HUMAN
2021-05-12 18:49:09,060 : Heading Extraction Adakveo_clean_ANNEX III.json_6 : Validation Passed | H | CAP |  en | 2 | Adakveo_clean_ANNEX III.json | currHeadId :- '22020' | currParentHeadId :- '22002.0' | prevParentHeadId :- '22019'
2021-05-12 18:49:09,167 : Heading Extraction Adakveo_clean_ANNEX III.json_6 : 


OriginalCheck



2021-05-12 18:49:09,725 : Heading Extraction Adakveo_clean_ANNEX III.json_6 : Match Passed | H | CAP |  en | 2 | Adakveo_clean_ANNEX III.json | Doc txt :- '4.       BATCH NUMBER' | Qrd txt :- '13. BATCH NUMBER<, DONATION AND PRODUCT CODES>' | Matched :- 'True'
2021-05-12 18:49:09,731 : Heading Extraction Adakveo_clean_ANNEX III.json_6 : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 2 | Adakveo_clean_ANNEX III.json | currHeadId :- '22015' | currParentHeadId :- '22002.0' | prevParentHeadId :- '22030'
2021-05-12 18:49:09,754 : Heading Extraction Adakveo_clean_ANNEX III.json_6 : Match Passed | H | CAP |  en | 2 | Adakveo_clean_ANNEX III.json | Doc txt :- '4.       BATCH NUMBER' | Qrd txt :- '4. BATCH NUMBER<, DONATION AND PRODUCT CODES>' | Matched :- 'True'
2021-05-12 18:49:09,762 : Heading Extraction Adakveo_clean_ANNEX III.json_6 : Validation Failed As Wrong Heading Found | H | CAP |  en | 2 | Adakveo_clean_ANNEX III.json | currHeadId :- '22025' | currPare



Heading Not Found 
 ['MINIMUM PARTICULARS TO APPEAR ON BLISTERS OR STRIPS', 'NAME OF THE MARKETING AUTHORISATION HOLDER']


dict_keys([])
Completed Heading Extraction For File
Starting Document Annotation For File :- Adakveo_clean_ANNEX III.json
Error Found
Completed Document Annotation
Starting Extracting Content Between Heading For File :- Adakveo_clean_ANNEX III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\partitionedJSONs\Adakveo_clean_ANNEX III.json
--------------------------------------------
Completed Extracting Content Between Heading
Already Exists


2021-05-12 18:49:14,630 : XML Submission Logger_2_V : Initiating Submission To FHIR Server | H | CAP |  en | 2 | Adakveo_clean_ANNEX III.json
2021-05-12 18:49:14,631 : XML Submission Logger_2_V : Response{"resourceType":"Bundle","id":"447cec56-1c2d-4ef7-a9a2-b25964d12d1a","meta":{"versionId":"1","lastUpdated":"2021-05-12T13:19:12.853+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:4ca10805-b187-4808-8435-92a3a2f839ad","resource":{"resourceType":"Bundle","id":"43a7bea4-2042-4c18-918b-c9c483a340fa","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-05-12T13:19:10+00:00","entry":[{"fullUr | H | CAP |  en | 2 | Adakveo_clean_ANNEX III.json
2021-05-12 18:49:14,632 : XML Submission Logger_2_V : POST sucessful: XML added with id: 447cec56-1c2d-4ef7-a9a2-b25964d12d1a | H | CAP |  en | 2 | Adakveo_clean_ANNEX III.json
2021-05-12 18:49:14,635 : Flow Logger HTML_O : 



||||||||||||||||

POST sucessful: XML added with id 447cec56-1c2d-4ef7-a9a2-b25964d12d1a
Created XML File For :- Adakveo_clean_ANNEX III.json
Starting Heading Extraction For File :- Adakveo_clean_ PACKAGE LEAFLET.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\partitionedJSONs\Adakveo_clean_ PACKAGE LEAFLET.json
--------------------------------------------
Package leaflet


2021-05-12 18:49:14,890 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Match Failed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Package leaflet: Information for the patient' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'
2021-05-12 18:49:14,970 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Match Failed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Adakveo 10 mg/ml concentrate for solution for infusion' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'
2021-05-12 18:49:15,072 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Match Failed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Adakveo 10 mg/ml concentrate for solution for infusion' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'
2021-05-12 18:49:15,117 : Heading Extraction Adakveo_




 important information for you.' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'clean_ PACKAGE LEAFLET.json | Doc txt :- 'Read all of
 again.' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'ed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- '-Keep this leaflet. You may need to read it
 again.' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- '-Keep this leaflet. You may need to read it
 or nurse.' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'| H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- '-If you have any further questions, ask your doctor
 or nurse.' | Qrd txt :- 'If you <take> <use> more Adakveo than you should' | Matched :- 'False'd | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :-





 side effects' | Qrd txt :- '4. Possible side effects' | Matched :- 'True'T.json_G : Match Passed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- '4.       Possible
2021-05-12 18:49:16,664 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Validation Flow Is Broken | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23019' | currParentHeadId :- '23001.0' | prevParentHeadId :- '23005'
2021-05-12 18:49:16,668 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Validation Passed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23019' | currParentHeadId :- '23001.0' | prevParentHeadId :- '23005'
 store Adakveo' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'| CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- '5.       How to
 store Adakveo' | Qrd txt :- '3. How to <take> <use> Adakveo ' | Matched :- 'False': Match Failed | H | CAP |  en | 3 | Adak





 of the pack and other information' | Qrd txt :- '6. Contents of the pack and other information' | Matched :- 'True'3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- '6.       Contents
2021-05-12 18:49:16,873 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Validation Passed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23023' | currParentHeadId :- '23001.0' | prevParentHeadId :- '23022'
2021-05-12 18:49:16,893 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Match Passed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- '1.       What Adakveo is and what it is used for' | Qrd txt :- '1. What Adakveo is and what it is used for' | Matched :- 'True'
2021-05-12 18:49:16,901 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23004' | currParentHeadId :- '23001.0' | prevPa


oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-05-12 18:49:17,122 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Match Failed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'What Adakveo is' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'
 called monoclonal antibodies (mAbs).' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'eo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Adakveo contains
 called monoclonal antibodies (mAbs).' | Qrd txt :- 'Adakveo contains {name the excipient(s)}' | Matched :- 'True'| 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Adakveo contains
2021-05-12 18:49:17,322 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Validation Flow Is Broken | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23013' | currParentHeadId :- '23005.0' | prevParentHeadId :- '23004'
2021-05-12 18:49:17,325 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Val




 hydroxyurea/hydroxycarbamide, although it may also be used alone.' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'n | Doc txt :- 'Adakveo is used to prevent recurrent
 hydroxyurea/hydroxycarbamide, although it may also be used alone.' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'xt :- 'Adakveo is used to prevent recurrent
 works' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'led | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'How Adakveo
 works' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False' CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'How Adakveo
 painful crises.' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Patients with sickle cell disease
 painful crises.' 

2021-05-12 18:49:22,087 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Match Failed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Children and adolescents' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'
2021-05-12 18:49:22,096 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Match Passed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Children and adolescents' | Qrd txt :- 'Use in children <and adolescents>' | Matched :- 'True'
2021-05-12 18:49:22,103 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Validation Flow Is Broken | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23015' | currParentHeadId :- '23014.0' | prevParentHeadId :- '23013'
2021-05-12 18:49:22,108 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Validation Passed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23015' | currParent

2021-05-12 18:49:26,077 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Match Failed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- '3.       How Adakveo is given' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'
2021-05-12 18:49:26,131 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Match Failed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- '3.       How Adakveo is given' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'
2021-05-12 18:49:26,205 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Match Failed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Adakveo will be given to you by a doctor or nurse.' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'
2021-05-12 18:49:26,300 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Match Failed | H | CA

 further questions on the use of this medicine, ask your doctor or nurse.' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False' txt :- 'If you have any
 further questions on the use of this medicine, ask your doctor or nurse.' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'If you have any
 effects' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'd | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- '4.       Possible side
 effects' | Qrd txt :- '4. Possible side effects' | Matched :- 'True'EAFLET.json_G : Match Passed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- '4.       Possible side
2021-05-12 18:49:32,042 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Validation Flow Is Broken | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23019' | currParentHeadId :- '23001.0' | prevParentHeadI

2021-05-12 18:49:38,630 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Match Failed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- '-pain in the muscles or bones of the chest (musculoskeletal chest pain)' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'
2021-05-12 18:49:38,719 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Match Failed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- '-sore throat (oropharyngeal pain)' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'
2021-05-12 18:49:38,802 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Match Failed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- '-sore throat (oropharyngeal pain)' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'
2021-05-12 18:49:38,907 : Heading Extraction Adakveo_clean_ PACKAGE 

 liquid.' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'd | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Adakveo
 liquid.' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'AP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Adakveo
 available in packs containing 1 vial.' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'o_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Adakveo is
 available in packs containing 1 vial.' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'PACKAGE LEAFLET.json | Doc txt :- 'Adakveo is
2021-05-12 18:49:42,895 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Match Failed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Marketing Authorisation Holder' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- '

 site: http://www.ema.europa.eu' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'clean_ PACKAGE LEAFLET.json | Doc txt :- 'Detailed
 information is intended for healthcare professionals only:' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'LET.json | Doc txt :- 'The following
 information is intended for healthcare professionals only:' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'| Doc txt :- 'The following
 information is intended for healthcare professionals only:' | Qrd txt :- 'The following information is intended for healthcare professionals only:' | Matched :- 'True' 'The following
2021-05-12 18:49:46,695 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Validation Passed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23030' | currParentHeadId :- '23001.0' | prevParentHeadId :- '23029'
 are for single use on

 concentration in the infusion bag within 1 mg/ml to 9.6 mg/ml.' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'c txt :- '-Keep the volume of Adakveo added to the infusion
 diluted solution by gently inverting the infusion bag. DO NOT SHAKE.' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False' Doc txt :- '5.       Mix the
 diluted solution by gently inverting the infusion bag. DO NOT SHAKE.' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False':- '5.       Mix the
2021-05-12 18:49:54,521 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Match Failed | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Storage of the diluted solution' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'
2021-05-12 18:49:54,630 : Heading Extraction Adakveo_clean_ PACKAGE LEAFLET.json_G : Match Failed | H | CAP |  en | 



Heading Not Found 


dict_keys([])
Completed Heading Extraction For File
Starting Document Annotation For File :- Adakveo_clean_ PACKAGE LEAFLET.json
Error Found
Completed Document Annotation
Starting Extracting Content Between Heading For File :- Adakveo_clean_ PACKAGE LEAFLET.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Adakveo\en\2021-05-12T12-47-53Z\partitionedJSONs\Adakveo_clean_ PACKAGE LEAFLET.json
--------------------------------------------
Completed Extracting Content Between Heading
Already Exists


2021-05-12 18:49:59,158 : XML Submission Logger_3_B : Initiating Submission To FHIR Server | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json
2021-05-12 18:49:59,160 : XML Submission Logger_3_B : Response{"resourceType":"Bundle","id":"f8b853d7-f0bd-4da1-a3a3-9cd3c76b3676","meta":{"versionId":"1","lastUpdated":"2021-05-12T13:19:58.798+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:5d41c199-6ffb-496d-9898-d36033690c67","resource":{"resourceType":"Bundle","id":"f6f56ae2-2e3f-4e71-a187-2bf0a37077a3","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-05-12T13:19:57+00:00","entry":[{"fullUr | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json
2021-05-12 18:49:59,163 : XML Submission Logger_3_B : POST sucessful: XML added with id: f8b853d7-f0bd-4da1-a3a3-9cd3c76b3676 | H | CAP |  en | 3 | Adakveo_clean_ PACKAGE LEAFLET.json
2021-05-12 18:49:59,165 : Flow Logger HTML_O :

POST sucessful: XML added with id f8b853d7-f0bd-4da1-a3a3-9cd3c76b3676
Created XML File For :- Adakveo_clean_ PACKAGE LEAFLET.json
