In [27]:
import os
import zipfile
%load_ext autoreload

%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [35]:
import tracemalloc
import psutil
import pprint
import pandas as pd
import uuid
import json
import os
import glob
import re
import sys
from bs4 import NavigableString, BeautifulSoup
from collections import defaultdict
import random
import string
import time

from utils.config import config
from utils.logger.logger import loggerCreator

# ePI Modules
from parse.rulebook.rulebook import StyleRulesDictionary

from parse.extractor.parser import parserExtractor
from match.matchDocument.matchDocument import MatchDocument
from documentAnnotation.documentAnnotation import DocumentAnnotation
from htmlDocTypePartitioner.partition import DocTypePartitioner
from extractContentBetweenHeadings.dataBetweenHeadingsExtractor import DataBetweenHeadingsExtractor
from fhirXmlGenerator.fhirXmlGenerator import FhirXmlGenerator
from fhirService.fhirService import FhirService
from utils.logger.matchLogger import MatchLogger
from languageInfo.documentTypeNames.documentTypeNames import DocumentTypeNames
from listBundle.addAndUpdateListBundle.addAndUpdateListBundle import ListBundleHandler

class FolderNotFoundError(Exception):
    pass

class Metrics:
    
    def __init__(self, logFileName, logger):
        self.logFileName = logFileName
        self.start()
        self.writer = open(self.logFileName, 'a')
        self.writer.write("StepName,Time,Current Memory,Peak Memory,Used Ram Percentage\n")
        self.finalPeak = 0
        self.finalTotalTime = 0
        self.finalUsedRamPerc = 0
        self.logger = logger
    
    def start(self):
        self.startTime = time.time()
        tracemalloc.start()
    
    def getMetric(self, msg):
        
        self.endTime = time.time()
        
        self.totalTime = self.endTime - self.startTime
        
        
        current, peak = tracemalloc.get_traced_memory()
        current = current / 10**6
        peak = peak / 10**6
        
        usedRamPerc = psutil.virtual_memory()[2]
        
        self.finalPeak = max(self.finalPeak, peak)
        self.finalUsedRamPerc = max(self.finalUsedRamPerc, usedRamPerc)

        self.finalTotalTime = self.finalTotalTime + self.totalTime
        #self.finalTotalTime = round(self.finalTotalTime/60,3)
        
        outputString = f"{msg},{round(self.totalTime/60,4)} Min,{current} MB,{peak} MB,{usedRamPerc}%\n"
        
        self.logger.logFlowCheckpoint(f"{outputString}")
        
        print(f"Metrics : {outputString}")
        self.writer.write(outputString)
        tracemalloc.stop()
        tracemalloc.start()
        self.startTime = time.time()
    def end(self):
        
        current, peak = tracemalloc.get_traced_memory()
        current = current / 10**6
        outputString = f"Final Metrics,{round(self.finalTotalTime/60,4)} Min,{current} MB,{self.finalPeak} MB,{self.finalUsedRamPerc}%\n"
        print(f"Metrics : {outputString}")
        self.logger.logFlowCheckpoint(f"{outputString}")
        self.writer.write(outputString)
        self.writer.close()
        tracemalloc.stop()
        
        


def convertToInt(x):
    try:
        return str(int(x))
    except:
        return x


def convertCollectionToDataFrame(collection):

    dfExtractedHier = pd.DataFrame(collection)
    dfExtractedHier['parent_id'] = dfExtractedHier['parent_id'].apply(
        lambda x: convertToInt(x))
    dfExtractedHier['id'] = dfExtractedHier['id'].apply(
        lambda x: convertToInt(x))

    return dfExtractedHier

def getRandomString(N):
    str_ = ''.join(random.choice(string.ascii_uppercase + string.digits
                                 + string.ascii_lowercase) for _ in range(N))
    return str_


def convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog):

    module_path = os.path.join(basePath)

    if "/" in basePath:
        pathSep = "/"
    else:
        pathSep = "\\"
    
    # Generate output folder path
    output_json_path = os.path.join(basePath, 'outputJSON')

    """
        Check if input folder exists, else throw exception
    """
    if(os.path.exists(module_path)):
        filenames = glob.glob(os.path.join(module_path, htmlDocName))

        # Create language specific folder in outputJSON folder if it doesn't exist
        if(not os.path.exists(output_json_path)):
            os.mkdir(output_json_path)
        logger = MatchLogger(f'Parser_{getRandomString(1)}', htmlDocName,
                             domain, procedureType, languageCode, "HTML", fileNameLog)

        styleLogger = MatchLogger(
            f'Style Dictionary_{getRandomString(1)}', htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)

        styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                             controlBasePath=controlBasePath,
                                             language=languageCode,
                                             fileName=fileNameQrd,
                                             domain=domain,
                                             procedureType=procedureType
                                             )

        parserObj = parserExtractor(config, logger, styleRulesObj.styleRuleDict,
                                    styleRulesObj.styleFeatureKeyList,
                                    styleRulesObj.qrd_section_headings)

        for input_filename in filenames:
          # if(input_filename.find('Kalydeco II-86-PI-clean')!=-1):
            output_filename = os.path.join(output_json_path, htmlDocName)
            style_filepath =  output_filename.replace('.html','.txt')
            style_filepath =  style_filepath.replace('.txtl','.txt')
            style_filepath =  style_filepath.replace('.htm','.txt')
            print("-------------",style_filepath,"-----------------")

            output_filename = output_filename.replace('.html', '.json')
            output_filename = output_filename.replace('.htm', '.json')
            print(input_filename, output_filename)
            parserObj.createPIJsonFromHTML(input_filepath=input_filename,
                                           output_filepath=output_filename,
                                           style_filepath = style_filepath,
                                           img_base64_dict=parserObj.convertImgToBase64(input_filename)
                                           )
            
        return output_filename.split(pathSep)[-1], style_filepath
    else:
        try:    
            raise FolderNotFoundError(module_path + " not found")
        except:  
            logger.logFlowCheckpoint("Folder For Language Code Not Found In Input File")
            logger.logException("Folder For Language Code Not Found In Input File")
        raise FolderNotFoundError(module_path + " not found")
        return None


def splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog):

    styleLogger = MatchLogger(
        f'Style Dictionary_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                        controlBasePath=controlBasePath,
                                        language=languageCode,
                                        fileName=fileNameQrd,
                                        domain=domain,
                                        procedureType=procedureType
                                        )
    
    path_json = os.path.join(basePath,'outputJSON', fileNameJson)
    print("PathJson",path_json)
    partitionLogger = MatchLogger(
        f'Partition_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    partitioner = DocTypePartitioner(partitionLogger)

    partitionedJsonPaths = partitioner.partitionHtmls(
        styleRulesObj.qrd_section_headings, path_json)

    return partitionedJsonPaths


def extractAndValidateHeadings(controlBasePath,
                                basePath,
                                domain,
                                procedureType,
                                languageCode,
                                documentNumber,
                                fileNameDoc,
                                fileNameQrd,
                                fileNameMatchRuleBook,
                                fileNameDocumentTypeNames,
                                fileNameLog,
                                stopWordFilterLen=6,
                                isPackageLeaflet=False,
                                medName=None
                                ):

    if documentNumber == 0:
        topHeadingsConsidered = 4
        bottomHeadingsConsidered = 6
    elif documentNumber == 1:
        topHeadingsConsidered = 3
        bottomHeadingsConsidered = 5
    elif documentNumber == 2:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 15
    else:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 10

    print(f"Starting Heading Extraction For File :- {fileNameDoc}")
    logger = MatchLogger(f"Heading Extraction {fileNameDoc}_{getRandomString(1)}", fileNameDoc, domain, procedureType, languageCode, documentNumber, fileNameLog)
    logger.logFlowCheckpoint("Starting Heading Extraction")

    stopWordlanguage = DocumentTypeNames(
        controlBasePath=controlBasePath,
        fileNameDocumentTypeNames=fileNameDocumentTypeNames,
        languageCode=languageCode,
        domain=domain,
        procedureType=procedureType,
        documentNumber=documentNumber
        ).extractStopWordLanguage()

    matchDocObj = MatchDocument(
        logger,
        controlBasePath,
        basePath,
        domain,
        procedureType,
        languageCode,
        documentNumber,
        fileNameDoc,
        fileNameQrd,
        fileNameMatchRuleBook,
        fileNameDocumentTypeNames,
        topHeadingsConsidered,
        bottomHeadingsConsidered,
        stopWordFilterLen,
        stopWordlanguage,
        isPackageLeaflet,
        medName)
    df, coll, documentType = matchDocObj.matchHtmlHeaddingsWithQrd()

    return df, coll, documentType


def parseDocument(controlBasePath,
                  basePath,
                  htmlDocName,
                  fileNameQrd,
                  fileNameMatchRuleBook,
                  fileNameDocumentTypeNames,
                  jsonTempFileName,
                  listBundleDocumentTypeCodesFileName,
                  apiMmgtBaseUrl,
                  getListApiEndPointUrlSuffix,
                  addUpdateListApiEndPointUrlSuffix,
                  addBundleApiEndPointUrlSuffix,
                  apiMmgtSubsKey,
                  medName = None):
    
    listRegulatedAuthCodesAccrossePI = []
    
    if "/" in basePath:
        pathSep = "/"        
    else:
        pathSep = "\\"
    
    fileNameLog = os.path.join(basePath,'FinalLog.txt')

    pathComponents = basePath.split(pathSep)
    print(pathComponents, htmlDocName)
    timestamp = pathComponents[-1]
    languageCode =  pathComponents[-2]
    medName = pathComponents[-3]
    procedureType = pathComponents[-4]
    domain = pathComponents[-5]

    print(timestamp, languageCode, medName, procedureType, domain)
        
    flowLogger =  MatchLogger(f"Flow Logger HTML_{getRandomString(1)}", htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)
    
    metrics = Metrics(os.path.join(basePath,'Metrics.csv'),flowLogger)
    
    
    flowLogger.logFlowCheckpoint("Starting HTML Conversion To Json")
    ###Convert Html to Json
    fileNameJson, stylesFilePath = convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog)
    
    print("stylePath:-",stylesFilePath)
    flowLogger.logFlowCheckpoint("Completed HTML Conversion To Json")
    metrics.getMetric("HTML Conversion To Json")

    flowLogger.logFlowCheckpoint("Starting Json Split")

    ###Split Uber Json to multiple Jsons for each category.
    partitionedJsonPaths = splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog)
    
    partitionedJsonPaths = [ path.split(pathSep)[-1] for path in partitionedJsonPaths]
    flowLogger.logFlowCheckpoint(str(partitionedJsonPaths))
    
    flowLogger.logFlowCheckpoint("Completed Json Split")
    metrics.getMetric("Split Json")
    
    flowLogger.logFlowCheckpoint("Started Processing Partitioned Jsons")
    
    for index, fileNamePartitioned in enumerate(partitionedJsonPaths):
        #print("Index", index)
        #if index in [0]:
        #    continue
        flowLogger.logFlowCheckpoint(f"\n\n\n\n||||||||||||||||||||||||||||||||{str(index)} ||||| {str(fileNamePartitioned)}||||||||||||||||||||||||||||||||\n\n\n\n")
        
        if index == 3:
            stopWordFilterLen = 100
            isPackageLeaflet = True
        else:
            stopWordFilterLen = 6
            isPackageLeaflet = False
            
        df, coll, documentType = extractAndValidateHeadings(controlBasePath,
                                    basePath,
                                    domain,
                                    procedureType,
                                    languageCode,
                                    index,
                                    fileNamePartitioned,
                                    fileNameQrd,
                                    fileNameMatchRuleBook,
                                    fileNameDocumentTypeNames,
                                    fileNameLog,
                                    stopWordFilterLen=stopWordFilterLen,
                                    isPackageLeaflet=isPackageLeaflet,
                                    medName=medName)
    
        
        print(f"Completed Heading Extraction For File")
        flowLogger.logFlowCheckpoint("Completed Heading Extraction For File")
        metrics.getMetric(f"{index}: Heading Extraction")

        print(f"Starting Document Annotation For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Document Annotation For File")
        documentAnnotationObj = DocumentAnnotation(fileNamePartitioned,'c20835db4b1b4e108828a8537ff41506','https://spor-sit.azure-api.net/pms/api/v2/',df,coll, index)
        try:
            pms_oms_annotation_data = documentAnnotationObj.processRegulatedAuthorizationForDoc()
            print(pms_oms_annotation_data)
        except Exception as e:
            pms_oms_annotation_data = None
            print("Error Found", str(e))
            
        print(f"Completed Document Annotation")        
        flowLogger.logFlowCheckpoint("Completed Document Annotation")
        metrics.getMetric(f"{index}: Document Annotation")
        
        print(f"Starting Extracting Content Between Heading For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Extracting Content Between Heading")
        
        extractContentlogger =  MatchLogger(f'ExtractContentBetween_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        extractorObj = DataBetweenHeadingsExtractor(extractContentlogger, basePath, coll)
        dfExtractedHierRR = extractorObj.extractContentBetweenHeadings(fileNamePartitioned)
        
        print(f"Completed Extracting Content Between Heading")        
        flowLogger.logFlowCheckpoint("Completed Extracting Content Between Heading")
        metrics.getMetric(f"{index}: Content Extraction")
        
        
        xmlLogger =  MatchLogger(f'XmlGeneration_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        fhirXmlGeneratorObj = FhirXmlGenerator(xmlLogger, controlBasePath, basePath, pms_oms_annotation_data, stylesFilePath, medName)
        fileNameXml = fileNamePartitioned.replace('.json','.xml')
        generatedXml = fhirXmlGeneratorObj.generateXml(dfExtractedHierRR, fileNameXml)
        
        metrics.getMetric(f"{index}: Generate XML")
        
        fhirServiceLogger =  MatchLogger(f'XML Submission Logger_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        
        fhirServiceObj = FhirService(fhirServiceLogger, apiMmgtBaseUrl, addBundleApiEndPointUrlSuffix, apiMmgtSubsKey, basePath, generatedXml)
        fhirServiceObj.submitFhirXml()
        
        
        
        
        metrics.getMetric(f"{index}: Submit FHIR Msg")
        
        print(f"Created XML File For :- {fileNamePartitioned}")
        
        flowLogger.logFlowCheckpoint("Starting list bundle update/addition")
        if documentAnnotationObj.listRegulatedAuthorizationIdentifiers != None:
            for id in documentAnnotationObj.listRegulatedAuthorizationIdentifiers:
                listRegulatedAuthCodesAccrossePI.append(id)
        listBundleLogger =  MatchLogger(f'List Bundle Creation Logger_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        print("\nlistRegulatedAuthCodesAccrossePI",listRegulatedAuthCodesAccrossePI)
        try:
            listBundleHandler = ListBundleHandler(listBundleLogger,
                     domain,
                     procedureType,
                     index,
                     documentType,
                     languageCode,
                     medName,
                     controlBasePath,
                     jsonTempFileName,
                     listBundleDocumentTypeCodesFileName,
                     fileNameDocumentTypeNames,
                     listRegulatedAuthCodesAccrossePI,
                     apiMmgtBaseUrl,
                     getListApiEndPointUrlSuffix,
                     addUpdateListApiEndPointUrlSuffix,
                     apiMmgtSubsKey)

            listBundleXml = listBundleHandler.addOrUpdateDocumentItem(str(fhirServiceObj.SubmittedFhirMsgRefId))
            listBundleHandler.submitListXmLToServer(listBundleXml)

            flowLogger.logFlowCheckpoint("Completed list bundle update/addition")
            metrics.getMetric(f"{index}: Update/Add List Bundle")
            #return df,coll,dfExtractedHierRR
        except Exception as e:
            print(str(e))
            if 'No MAN Code found' in str(e):
                flowLogger.logFlowCheckpoint("Skipping list bundle addtion/update as no MAN found")
            
    
    flowLogger.logFlowCheckpoint("Completed Processing Partitioned Jsons")
    metrics.getMetric(f"{index}: Completed")
    metrics.end()

In [9]:
from wordToHtmlConvertor.wordToHtmlConvertor import WordToHtmlConvertor

wordToHtmlConvertorObj = WordToHtmlConvertor()
wordToHtmlConvertorObj.convertWordToHTML()

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'F:\\Projects\\EMA\\Repository\\EMA EPI PoC\\function_code\\data\\Ingest'

In [36]:
# inputZipFolderPath = "F:\Projects\EMA\Repository\EMA EPI PoC\\function_code\\inputblob"
inputZipFolderPath = os.path.abspath(os.path.join('..'))
inputZipFolderPath = os.path.join(inputZipFolderPath, 'inputblob')
inputZipFileName = "ELOCTA~H~CAP~en~2021-05-21T09-52-29Z.zip"

In [37]:
fileNameQrd = 'qrd_canonical_model.csv'
fileNameMatchRuleBook = 'ruleDict.json'
fileNameDocumentTypeNames = 'documentTypeNames.json'
fsMountName = '/mounted'
jsonTempFileName = 'listBundleJsonTemplate.json'
listBundleDocumentTypeCodesFileName = 'listBundleDocumentTypeCodes.json'
apiMmgtBaseUrl = "https://ema-dap-epi-dev-fhir-apim.azure-api.net"
getListApiEndPointUrlSuffix = "/epi/v1/List"
addUpdateListApiEndPointUrlSuffix = "/epi-w/v1/List"
addBundleApiEndPointUrlSuffix = "/epi-w/v1/Bundle"
apiMmgtSubsKey = "ba6d7e9a73ed4facaa58fc983bf6db50"



info = inputZipFileName.split("~")

try:
    medName = info[0]
    domain = info[1]
    procedureType = info[2]
    languageCode = info[3]
    timestamp = info[4]
    timestamp = timestamp.replace(".zip","")

except Exception:
    raise f"Missing required info in the zip file name {inputZipFileName}"

if "\\" in os.getcwd():
    localEnv = True
    inputZipFolderPath = os.path.join(os.path.abspath(os.path.join('..')),inputZipFolderPath)
    outputFolderPath = os.path.join(os.path.abspath(os.path.join('..')), 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
    controlFolderPath = os.path.join(os.path.abspath(os.path.join('..')),'control')
else:
    localEnv = False
    inputZipFolderPath = os.path.join(f'{fsMountName}',inputZipFolderPath)
    outputFolderPath = os.path.join(f'{fsMountName}', 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
    controlFolderPath = os.path.join(f'{fsMountName}','control')


print(inputZipFileName, inputZipFolderPath, outputFolderPath, controlFolderPath)

mode = 0o666

if localEnv is True:
    inputZipFolderPath = inputZipFolderPath.replace("/","\\")
    outputFolderPath = outputFolderPath.replace("/","\\")
    controlFolderPath = controlFolderPath.replace("/","\\")

try:
    os.makedirs(inputZipFolderPath, mode)
    os.makedirs(outputFolderPath, mode)
    os.makedirs(controlFolderPath, mode)

except Exception:
    print("Already Present")
    
with zipfile.ZipFile(f'{inputZipFolderPath}/{inputZipFileName}',"r") as zip_ref:
        zip_ref.extractall(outputFolderPath)
    

_,_,fileNames = next(os.walk(outputFolderPath))
htmlFileName = [fileName for fileName in fileNames if ".htm" in fileName][0]

print(htmlFileName)



ELOCTA~H~CAP~en~2021-05-21T09-52-29Z.zip F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z F:\Projects\EMA\Repository\EMA EPI PoC\function_code\control
Already Present
ELOCTA_clean.htm


In [38]:
parseDocument(controlFolderPath,
              outputFolderPath,
              htmlFileName,
              fileNameQrd,
              fileNameMatchRuleBook,
              fileNameDocumentTypeNames,
              jsonTempFileName,
              listBundleDocumentTypeCodesFileName,
              apiMmgtBaseUrl,
              getListApiEndPointUrlSuffix,
              addUpdateListApiEndPointUrlSuffix,
              addBundleApiEndPointUrlSuffix,
              apiMmgtSubsKey,
              medName)

2021-06-07 19:45:49,721 : Flow Logger HTML_b : Starting HTML Conversion To Json | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:45:49,735 : Style Dictionary_5 : Reading style dictionary in file: rule_dictionary_en.json | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:45:49,816 : Style Dictionary_5 : Qrd Section Keys Retrieved For Style Dictionary: ANNEX I, ANNEX II, ANNEX III, B. PACKAGE LEAFLET | H | CAP |  en | HTML | ELOCTA_clean.htm


['F:', 'Projects', 'EMA', 'Repository', 'EMA EPI PoC', 'function_code', 'work', 'H', 'CAP', 'ELOCTA', 'en', '2021-05-21T09-52-29Z'] ELOCTA_clean.htm
2021-05-21T09-52-29Z en ELOCTA CAP H
------------- F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\outputJSON\ELOCTA_clean.txt -----------------
F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\ELOCTA_clean.htm F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\outputJSON\ELOCTA_clean.json


2021-06-07 19:45:51,811 : Parser_5 : Style Information Stored In File: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\outputJSON\ELOCTA_clean.txt | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:46:01,882 : Parser_5 : Writing to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\outputJSON\ELOCTA_clean.json | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:46:02,536 : Flow Logger HTML_b : Completed HTML Conversion To Json | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:46:02,539 : Flow Logger HTML_b : HTML Conversion To Json,0.2137 Min,8.362819 MB,18.086029 MB,48.2%
 | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:46:02,567 : Flow Logger HTML_b : Starting Json Split | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:46:02,578 : Style Dictionary_s : Reading style dictionary in file: rule_dictionary_en.json | H | CAP |  en | Json | ELOCTA_clean.json
2021-06-

stylePath:- F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\outputJSON\ELOCTA_clean.txt
Metrics : HTML Conversion To Json,0.2137 Min,8.362819 MB,18.086029 MB,48.2%

PathJson F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\outputJSON\ELOCTA_clean.json


2021-06-07 19:46:03,138 : Partition_R : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\partitionedJSONs\ELOCTA_clean_SmPC.json | H | CAP |  en | Json | ELOCTA_clean.json
2021-06-07 19:46:03,321 : Partition_R : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\partitionedJSONs\ELOCTA_clean_ANNEX II.json | H | CAP |  en | Json | ELOCTA_clean.json
2021-06-07 19:46:03,453 : Partition_R : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\partitionedJSONs\ELOCTA_clean_ANNEX III.json | H | CAP |  en | Json | ELOCTA_clean.json
2021-06-07 19:46:03,477 : Partition_R : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\partitionedJSONs\ELOCTA_clean_ PACKAGE LEAFLET.json | H | CAP |  en | Json | ELOCTA_clean.json

Metrics : Split Json,0.0157 Min,0.281448 MB,17.653005 MB,48.2%

Starting Heading Extraction For File :- ELOCTA_clean_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\partitionedJSONs\ELOCTA_clean_SmPC.json
--------------------------------------------
SmPC


2021-06-07 19:46:04,330 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Started Extracting Heading | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json
2021-06-07 19:46:04,570 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Match Passed | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | Doc txt :- 'SUMMARY OF PRODUCT CHARACTERISTICS' | Qrd txt :- 'SUMMARY OF PRODUCT CHARACTERISTICS' | Matched :- 'True'
2021-06-07 19:46:04,579 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Validation Passed As This The First Heading | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | currHeadId :- '20001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-07 19:46:04,648 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Match Passed | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-06-07 19:46:04,666 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Validation Flow Is Broken | H | CAP |  en | 0 | EL

2021-06-07 19:46:37,883 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Validation Flow Is Broken | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | currHeadId :- '20031' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20011'
2021-06-07 19:46:37,929 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Validation Failed As Current H3 Heading Is Not Part Of Valid H3 Headings in Previous H2 | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | currHeadId :- '20031' | prevHeadingCurrId :- '20011' | prevHeadingFoundId :- '20011'
2021-06-07 19:46:37,993 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Match Passed | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-06-07 19:46:38,027 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Validation Flow Is Broken | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | currHeadId :- '20037' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20011'
2021-06-07 19:46:38,047 : Heading 

2021-06-07 19:46:51,454 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Validation Passed | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | currHeadId :- '20020' | prevHeadingCurrId :- '20017' | prevHeadingFoundId :- '20017'
2021-06-07 19:46:52,467 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Match Passed | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | Doc txt :- '4.6 Fertility, pregnancy and lactation' | Qrd txt :- '4.6 Fertility, pregnancy and lactation' | Matched :- 'True'
2021-06-07 19:46:52,487 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Validation Flow Is Broken | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | currHeadId :- '20022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20020'
2021-06-07 19:46:52,494 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Validation Passed | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | currHeadId :- '20022' | prevHeadingCurrId :- '20020' | prevHeadingFoundId :- '20020'
2021-06-07 19:46:52,720 : Heading Extraction ELOCTA_clean_SmPC.json_3 : M

2021-06-07 19:47:05,557 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Validation Failed As Current H3 Heading Is Not Part Of Valid H3 Headings in Previous H2 | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | currHeadId :- '20034' | prevHeadingCurrId :- '20033' | prevHeadingFoundId :- '20033'
2021-06-07 19:47:07,095 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Match Passed | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | Doc txt :- 'Clinical efficacy and safety' | Qrd txt :- 'Clinical efficacy and safety' | Matched :- 'True'
2021-06-07 19:47:07,114 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Validation Flow Is Broken | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | currHeadId :- '20036' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20033'
2021-06-07 19:47:07,132 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Validation Failed As Current H3 Heading Is Not Part Of Valid H3 Headings in Previous H2 | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | currHeadId :- '20036' | prevHeadingCur

2021-06-07 19:47:20,400 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Match Passed | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | Doc txt :- '6. PHARMACEUTICAL PARTICULARS' | Qrd txt :- '6. PHARMACEUTICAL PARTICULARS' | Matched :- 'True'
2021-06-07 19:47:20,421 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Validation Flow Is Broken | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | currHeadId :- '20047' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20045'
2021-06-07 19:47:20,434 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Validation Passed | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | currHeadId :- '20047' | prevHeadingCurrId :- '20045' | prevHeadingFoundId :- '20045'
2021-06-07 19:47:21,042 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Match Passed | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json | Doc txt :- '6.1 List of excipients' | Qrd txt :- '6.1 List of excipients' | Matched :- 'True'
2021-06-07 19:47:21,057 : Heading Extraction ELOCTA_clean_SmPC.json_3 : Validation Pass



Heading Not Found 
 ['qThis medicinal product is subject to additional monitoring. This will allow quick identification of new safety information. Healthcare professionals are asked to report any suspected adverse reactions. See section 4.8 for how to report adverse reactions.', 'General description', 'Qualitative and quantitative composition', 'Excipient(s) with known effect', 'Posology', 'Paediatric population', 'Method of administration ', 'Precautions to be taken before handling or administering the medicinal product', 'Traceability', 'Paediatric population', 'Paediatric population', 'Pregnancy', 'Breast-feeding', 'Fertility', 'Paediatric population', 'Reporting of suspected adverse reactions', 'Paediatric population', 'Mechanism of action', 'Pharmacodynamic effects', 'Clinical efficacy and safety', 'Paediatric population', 'Absorption', 'Distribution', 'Biotransformation', 'Elimination', 'Linearity/non-linearity', 'Pharmacokinetic/pharmacodynamic relationship(s)', 'Environmental

2021-06-07 19:47:58,982 : Flow Logger HTML_b : Completed Document Annotation | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:47:58,984 : Flow Logger HTML_b : 0: Document Annotation,0.0634 Min,0.169756 MB,0.204108 MB,49.1%
 | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:47:58,996 : Flow Logger HTML_b : Starting Extracting Content Between Heading | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:47:59,005 : ExtractContentBetween_0_l : Cleaning Match Results | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json
2021-06-07 19:47:59,017 : ExtractContentBetween_0_l : Finished Cleaning Match Results | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json


Found entry with code 220000000061
Found Packaged Product Definition
Error Found Missing Key 'holder' in entry key value pair
Completed Document Annotation
Metrics : 0: Document Annotation,0.0634 Min,0.169756 MB,0.204108 MB,49.1%

Starting Extracting Content Between Heading For File :- ELOCTA_clean_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\partitionedJSONs\ELOCTA_clean_SmPC.json
--------------------------------------------


2021-06-07 19:47:59,192 : Flow Logger HTML_b : Completed Extracting Content Between Heading | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:47:59,192 : Flow Logger HTML_b : 0: Content Extraction,0.0033 Min,0.660822 MB,7.499021 MB,49.1%
 | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:47:59,284 : XmlGeneration_0_u : PMS/OMS Annotation Information Not Retrieved | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json
2021-06-07 19:47:59,292 : XmlGeneration_0_u : Initiating XML Generation | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json


Completed Extracting Content Between Heading
Metrics : 0: Content Extraction,0.0033 Min,0.660822 MB,7.499021 MB,49.1%

Already Exists


2021-06-07 19:47:59,884 : XmlGeneration_0_u : Writing to File:ELOCTA_clean_SmPC.xml | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json
2021-06-07 19:47:59,900 : Flow Logger HTML_b : 0: Generate XML,0.0117 Min,2.591086 MB,5.772164 MB,49.1%
 | H | CAP |  en | HTML | ELOCTA_clean.htm


Metrics : 0: Generate XML,0.0117 Min,2.591086 MB,5.772164 MB,49.1%



2021-06-07 19:48:09,429 : XML Submission Logger_0_7 : Initiating Submission To FHIR Server | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json
2021-06-07 19:48:09,434 : XML Submission Logger_0_7 : Response{"resourceType":"Bundle","id":"197a6e0e-e617-41f1-8fc4-e8ffe86e81c9","meta":{"versionId":"1","lastUpdated":"2021-06-07T14:18:07.632+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:43b48f7d-875b-45ea-91d1-739ab2c93489","resource":{"resourceType":"Bundle","id":"43ad2fb6-3d3e-4f7d-93ca-5265bc7b2c54","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-06-07T14:17:59+00:00","entry":[{"fullUr | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json
2021-06-07 19:48:09,457 : XML Submission Logger_0_7 : POST sucessful: XML added with id: 197a6e0e-e617-41f1-8fc4-e8ffe86e81c9 | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json
2021-06-07 19:48:09,462 : Flow Logger HTML_b : 0: Submit FHIR Msg,0.1592 Min,0.215475

POST sucessful: XML added with id 197a6e0e-e617-41f1-8fc4-e8ffe86e81c9
Metrics : 0: Submit FHIR Msg,0.1592 Min,0.215475 MB,2.343505 MB,49.1%

Created XML File For :- ELOCTA_clean_SmPC.json

listRegulatedAuthCodesAccrossePI ['EU/1/15/1046/001', 'EU/1/15/1046/002', 'EU/1/15/1046/003', 'EU/1/15/1046/004', 'EU/1/15/1046/005', 'EU/1/15/1046/006', 'EU/1/15/1046/007', 'EU/1/15/1046/008']


2021-06-07 19:48:10,545 : List Bundle Creation Logger_0_z : No list bundle found for man EU/1/15/1046/001 | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json
2021-06-07 19:48:10,545 : List Bundle Creation Logger_0_z : Getting list bundle for MAN EU/1/15/1046/002  | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json
2021-06-07 19:48:11,569 : List Bundle Creation Logger_0_z : No list bundle found for man EU/1/15/1046/002 | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json
2021-06-07 19:48:11,569 : List Bundle Creation Logger_0_z : Getting list bundle for MAN EU/1/15/1046/003  | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json
2021-06-07 19:48:12,561 : List Bundle Creation Logger_0_z : No list bundle found for man EU/1/15/1046/003 | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json
2021-06-07 19:48:12,563 : List Bundle Creation Logger_0_z : Getting list bundle for MAN EU/1/15/1046/004  | H | CAP |  en | 0 | ELOCTA_clean_SmPC.json
2021-06-07 19:48:13,357 : List Bundle Creation Logger_0_z : No list bundle found for man EU/1/

Metrics : 0: Update/Add List Bundle,0.1954 Min,0.299718 MB,0.352495 MB,49.3%

Starting Heading Extraction For File :- ELOCTA_clean_ANNEX II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\partitionedJSONs\ELOCTA_clean_ANNEX II.json
--------------------------------------------
AnnexII


2021-06-07 19:48:21,788 : Heading Extraction ELOCTA_clean_ANNEX II.json_U : Started Extracting Heading | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json
2021-06-07 19:48:21,807 : Heading Extraction ELOCTA_clean_ANNEX II.json_U : Match Passed | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json | Doc txt :- 'ANNEX II' | Qrd txt :- 'ANNEX II' | Matched :- 'True'
2021-06-07 19:48:21,820 : Heading Extraction ELOCTA_clean_ANNEX II.json_U : Validation Passed As This The First Heading | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json | currHeadId :- '21001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
 SUBSTANCE AND MANUFACTURER RESPONSIBLE FOR BATCH RELEASE' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'True'ANUFACTURERS OF THE BIOLOGICAL ACTIVE
2021-06-07 19:48:21,923 : Heading Extraction ELOCTA_clean_ANNEX II.json_U : Validation Passed | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json | currHeadId 


OriginalCheck



2021-06-07 19:48:22,930 : Heading Extraction ELOCTA_clean_ANNEX II.json_U : Match Failed In Lowercase : <=7|92.77|(7, 11, 86)|0.339| | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json | Doc txt :- 'A.      MANUFACTURERS OF THE BIOLOGICAL ACTIVE SUBSTANCE AND MANUFACTURER RESPONSIBLE FOR BATCH RELEASE' | Qrd txt :- 'Name and address of the manufacturer(s) responsible for batch release' | Matched :- 'False'



OriginalCheck



2021-06-07 19:48:23,380 : Heading Extraction ELOCTA_clean_ANNEX II.json_U : End Of Sub Section | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json
 BATCH RELEASE' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'True'ean_ANNEX II.json | Doc txt :- 'A.      MANUFACTURERS OF THE BIOLOGICAL ACTIVE SUBSTANCE AND MANUFACTURER RESPONSIBLE FOR
2021-06-07 19:48:23,458 : Heading Extraction ELOCTA_clean_ANNEX II.json_U : Validation Passed As This The First Heading | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json | currHeadId :- '21002' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''


oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo

OriginalCheck



 address of the manufacturers of the biological active substance' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'False'nd
 address of the manufacturers of the biological active substance' | Qrd txt :- 'Name and address of the manufacturer(s) of the biological active substance(s)' | Matched :- 'True' :- 'Name and
2021-06-07 19:48:23,662 : Heading Extraction ELOCTA_clean_ANNEX II.json_U : Validation Passed | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json | currHeadId :- '21003' | prevHeadingCurrId :- '21002' | prevHeadingFoundId :- '21002'
 address of the manufacturer responsible for batch release' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'False'Name and
 address of the manufacturer responsible for batch release' | Qrd txt :- 'Name and address of the manufacturer(s) responsible for batch release' | Match


OriginalCheck



2021-06-07 19:48:26,580 : Heading Extraction ELOCTA_clean_ANNEX II.json_U : Match Passed | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json | Doc txt :- 'B. CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE' | Qrd txt :- 'B. CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE' | Matched :- 'True'
2021-06-07 19:48:26,595 : Heading Extraction ELOCTA_clean_ANNEX II.json_U : Validation Passed | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json | currHeadId :- '21005' | prevHeadingCurrId :- '21004' | prevHeadingFoundId :- '21004'
 Product Characteristics, section 4.2).' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'False'II.json | Doc txt :- 'Medicinal



OriginalCheck



2021-06-07 19:48:27,692 : Heading Extraction ELOCTA_clean_ANNEX II.json_U : Match Passed | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json | Doc txt :- 'C. OTHER CONDITIONS AND REQUIREMENTS OF THE MARKETING AUTHORISATION' | Qrd txt :- 'C. OTHER CONDITIONS AND REQUIREMENTS OF THE MARKETING AUTHORISATION' | Matched :- 'True'
2021-06-07 19:48:27,713 : Heading Extraction ELOCTA_clean_ANNEX II.json_U : Validation Flow Is Broken | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json | currHeadId :- '21007' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '21005'
2021-06-07 19:48:27,724 : Heading Extraction ELOCTA_clean_ANNEX II.json_U : Validation Passed | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json | currHeadId :- '21007' | prevHeadingCurrId :- '21005' | prevHeadingFoundId :- '21005'
2021-06-07 19:48:27,976 : Heading Extraction ELOCTA_clean_ANNEX II.json_U : Match Passed : <=7|2.56|(99, 100, 100)|0.995| | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json | Doc txt :- '·Periodic safety update repo



Heading Not Found 
 ['Official batch release', 'Additional risk minimisation measures', 'Obligation to conduct post-authorisation measures', 'SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FOR\r\n<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>']


dict_keys([])
Completed Heading Extraction For File
Metrics : 1: Heading Extraction,0.1424 Min,0.233238 MB,2.960005 MB,49.0%

Starting Document Annotation For File :- ELOCTA_clean_ANNEX II.json
Error Found No Authorization Code Found In The Document ELOCTA_clean_ANNEX II.json
Completed Document Annotation
Metrics : 1: Document Annotation,0.0005 Min,0.00657 MB,0.163982 MB,49.0%

Starting Extracting Content Between Heading For File :- ELOCTA_clean_ANNEX II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\partitionedJSONs\ELOCTA_clean_ANNEX II.json
--------------------------------------------
Completed E

2021-06-07 19:48:29,936 : XmlGeneration_1_T : PMS/OMS Annotation Information Not Retrieved | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json
2021-06-07 19:48:29,941 : XmlGeneration_1_T : Initiating XML Generation | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json
2021-06-07 19:48:29,941 : XmlGeneration_1_T : Initiating XML Generation | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json
2021-06-07 19:48:30,069 : XmlGeneration_1_T : Writing to File:ELOCTA_clean_ANNEX II.xml | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json
2021-06-07 19:48:30,069 : XmlGeneration_1_T : Writing to File:ELOCTA_clean_ANNEX II.xml | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json
2021-06-07 19:48:30,083 : Flow Logger HTML_b : 1: Generate XML,0.0038 Min,0.406907 MB,0.637258 MB,49.0%
 | H | CAP |  en | HTML | ELOCTA_clean.htm


Metrics : 1: Generate XML,0.0038 Min,0.406907 MB,0.637258 MB,49.0%

b'<?xml version="1.0" encoding="UTF-8"?>\n<!-- This is a template for a FHIR resource, and needs items (marked with "${}") replacing to make a real instance -->\n<!-- The resulting instance is a Bundle of Bundles, each of which is a document (having a Composition, and supporting resources) -->\n<!-- 2020-02-22 -->\n<!-- This is for FHIR version R5 Preview 2 (May 2020) -->\n<Bundle xmlns="http://hl7.org/fhir">\n\t<type value="collection"/>\n\t<!-- Repeat at this level per document -->\n\t<entry>\n\t\t<fullUrl value="urn:uuid:eb987a8a-38e9-407c-a010-98f0afbbfe13"/>\n\t\t<!-- Top level of each document is a also FHIR Bundle, of type "document"\n\t\t see http://hl7.org/fhir/documents.html, http://hl7.org/fhir/bundle.html\n\t \t All the other resources for this document are within this. -->\n\t\t<resource>\n            <Bundle>\n            \t<!-- When PUTing, some servers mandate an id here to match the existing id -->\n  

2021-06-07 19:48:34,113 : XML Submission Logger_1_o : Initiating Submission To FHIR Server | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json
2021-06-07 19:48:34,118 : XML Submission Logger_1_o : Response{"resourceType":"Bundle","id":"3084d30a-7f45-45cb-a349-ea2897f7b5d2","meta":{"versionId":"1","lastUpdated":"2021-06-07T14:18:33.505+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:eb987a8a-38e9-407c-a010-98f0afbbfe13","resource":{"resourceType":"Bundle","id":"bf607ec5-b63c-4996-ba60-d1db9927b9ef","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-06-07T14:18:29+00:00","entry":[{"fullUr | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json
2021-06-07 19:48:34,124 : XML Submission Logger_1_o : POST sucessful: XML added with id: 3084d30a-7f45-45cb-a349-ea2897f7b5d2 | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json
2021-06-07 19:48:34,127 : Flow Logger HTML_b : 1: Submit FHIR Msg,0.0673 

POST sucessful: XML added with id 3084d30a-7f45-45cb-a349-ea2897f7b5d2
Metrics : 1: Submit FHIR Msg,0.0673 Min,0.049401 MB,0.299189 MB,49.0%

Created XML File For :- ELOCTA_clean_ANNEX II.json

listRegulatedAuthCodesAccrossePI ['EU/1/15/1046/001', 'EU/1/15/1046/002', 'EU/1/15/1046/003', 'EU/1/15/1046/004', 'EU/1/15/1046/005', 'EU/1/15/1046/006', 'EU/1/15/1046/007', 'EU/1/15/1046/008']


2021-06-07 19:48:35,169 : List Bundle Creation Logger_1_0 : Getting list bundle for MAN EU/1/15/1046/002  | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json
2021-06-07 19:48:37,246 : List Bundle Creation Logger_1_0 : Getting list bundle for MAN EU/1/15/1046/003  | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json
2021-06-07 19:48:38,165 : List Bundle Creation Logger_1_0 : Getting list bundle for MAN EU/1/15/1046/004  | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json
2021-06-07 19:48:39,429 : List Bundle Creation Logger_1_0 : Getting list bundle for MAN EU/1/15/1046/005  | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json
2021-06-07 19:48:41,169 : List Bundle Creation Logger_1_0 : Getting list bundle for MAN EU/1/15/1046/006  | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json
2021-06-07 19:48:44,540 : List Bundle Creation Logger_1_0 : Getting list bundle for MAN EU/1/15/1046/007  | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json
2021-06-07 19:48:46,453 : List Bundle Creation Logger_1_0 : Getting li

Updating


2021-06-07 19:48:56,725 : List Bundle Creation Logger_1_0 : List update successfully completed 5829d349-155f-4f0a-ac97-6a29dd532996 | H | CAP |  en | 1 | ELOCTA_clean_ANNEX II.json
2021-06-07 19:48:56,726 : Flow Logger HTML_b : Completed list bundle update/addition | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:48:56,733 : Flow Logger HTML_b : 1: Update/Add List Bundle,0.3766 Min,0.303822 MB,0.645687 MB,49.1%
 | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:48:56,740 : Flow Logger HTML_b : 



||||||||||||||||||||||||||||||||2 ||||| ELOCTA_clean_ANNEX III.json||||||||||||||||||||||||||||||||



 | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:48:56,748 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Starting Heading Extraction | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json


Metrics : 1: Update/Add List Bundle,0.3766 Min,0.303822 MB,0.645687 MB,49.1%

Starting Heading Extraction For File :- ELOCTA_clean_ANNEX III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\partitionedJSONs\ELOCTA_clean_ANNEX III.json
--------------------------------------------
Labelling


2021-06-07 19:48:57,485 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Started Extracting Heading | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json
2021-06-07 19:48:58,542 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed : <=1|25.0|(86, 100, 95)|0.921| | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- 'A. LABELLING' | Qrd txt :- 'LABELLING ' | Matched :- 'True'
2021-06-07 19:48:58,555 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed As This The First Heading | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-07 19:48:58,586 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed : Contains<>|111.76|(64, 85, 86)|0.877| | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- 'PARTICULARS TO APPEAR ON THE OUTER PACKAGING' | Qrd txt :- 'PARTICULARS TO APPEAR ON <THE OUTER PACKAGING> <AND> <THE IMMEDIATE PACKAGING>' | Matched :- 'True

2021-06-07 19:49:15,942 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '15. INSTRUCTIONS ON USE' | Qrd txt :- '15. INSTRUCTIONS ON USE' | Matched :- 'True'
2021-06-07 19:49:15,961 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22017' | prevHeadingCurrId :- '22016' | prevHeadingFoundId :- '22016'
2021-06-07 19:49:16,331 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '16. INFORMATION IN BRAILLE' | Qrd txt :- '16. INFORMATION IN BRAILLE' | Matched :- 'True'
2021-06-07 19:49:16,352 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22018' | prevHeadingCurrId :- '22017' | prevHeadingFoundId :- '22017'
2021-06-07 19:49:17,265 : Heading Extraction ELOCTA_cl


OriginalCheck



2021-06-07 19:49:22,516 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '2. METHOD OF ADMINISTRATION' | Qrd txt :- '2. METHOD OF ADMINISTRATION' | Matched :- 'True'
2021-06-07 19:49:22,532 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22029' | prevHeadingCurrId :- '22028' | prevHeadingFoundId :- '22028'
 DATE' | Qrd txt :- '8. EXPIRY DATE' | Matched :- 'True'n_ANNEX III.json_R : Match Passed : <=4|7.14|(93, 93, 93)|0.971| | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '3.       EXPIRY
2021-06-07 19:49:22,701 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22010' | prevHeadingCurrId :- '22009' | prevHeadingFoundId :- '22029'
2021-06-07 19:49:22,912 : Heading Extraction EL

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-06-07 19:49:25,448 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-06-07 19:49:25,467 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22003' | prevHeadingCurrId :- '22002' | prevHeadingFoundId :- '22002'
2021-06-07 19:49:27,577 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Qrd txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Matched :- 'True'
2021-06-07 19:49:27,598 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22004' | prevHeadingCurrId :- '22003' | prevHeadingFoundId :- '22003'
2021-06-07 19:49:2

2021-06-07 19:49:43,203 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22019' | prevHeadingCurrId :- '22018' | prevHeadingFoundId :- '22018'
2021-06-07 19:49:44,413 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Qrd txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Matched :- 'True'
2021-06-07 19:49:44,435 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22020' | prevHeadingCurrId :- '22019' | prevHeadingFoundId :- '22019'
2021-06-07 19:49:45,204 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- 'MINIMUM PARTICULARS TO APPEAR ON SMALL IMMEDIATE PACKAGING UNITS' | Qrd txt :- 'MINIMUM PARTICULARS TO A


OriginalCheck



2021-06-07 19:49:48,523 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '2. METHOD OF ADMINISTRATION' | Qrd txt :- '2. METHOD OF ADMINISTRATION' | Matched :- 'True'
2021-06-07 19:49:48,549 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22029' | prevHeadingCurrId :- '22028' | prevHeadingFoundId :- '22028'
 DATE' | Qrd txt :- '8. EXPIRY DATE' | Matched :- 'True'n_ANNEX III.json_R : Match Passed : <=4|7.14|(93, 93, 93)|0.971| | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '3.       EXPIRY
2021-06-07 19:49:48,742 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22010' | prevHeadingCurrId :- '22009' | prevHeadingFoundId :- '22029'
2021-06-07 19:49:48,943 : Heading Extraction EL

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-06-07 19:49:51,755 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-06-07 19:49:51,778 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22003' | prevHeadingCurrId :- '22002' | prevHeadingFoundId :- '22002'
2021-06-07 19:49:53,811 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Qrd txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Matched :- 'True'
2021-06-07 19:49:53,833 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22004' | prevHeadingCurrId :- '22003' | prevHeadingFoundId :- '22003'
2021-06-07 19:49:5

2021-06-07 19:50:08,550 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22019' | prevHeadingCurrId :- '22018' | prevHeadingFoundId :- '22018'
2021-06-07 19:50:09,710 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Qrd txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Matched :- 'True'
2021-06-07 19:50:09,732 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22020' | prevHeadingCurrId :- '22019' | prevHeadingFoundId :- '22019'
2021-06-07 19:50:10,386 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- 'MINIMUM PARTICULARS TO APPEAR ON SMALL IMMEDIATE PACKAGING UNITS' | Qrd txt :- 'MINIMUM PARTICULARS TO A


OriginalCheck



2021-06-07 19:50:13,734 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '2. METHOD OF ADMINISTRATION' | Qrd txt :- '2. METHOD OF ADMINISTRATION' | Matched :- 'True'
2021-06-07 19:50:13,761 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22029' | prevHeadingCurrId :- '22028' | prevHeadingFoundId :- '22028'
 DATE' | Qrd txt :- '8. EXPIRY DATE' | Matched :- 'True'n_ANNEX III.json_R : Match Passed : <=4|7.14|(93, 93, 93)|0.971| | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '3.       EXPIRY
2021-06-07 19:50:13,923 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22010' | prevHeadingCurrId :- '22009' | prevHeadingFoundId :- '22029'
2021-06-07 19:50:14,156 : Heading Extraction EL

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-06-07 19:50:17,132 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-06-07 19:50:17,148 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22003' | prevHeadingCurrId :- '22002' | prevHeadingFoundId :- '22002'
2021-06-07 19:50:19,237 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Qrd txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Matched :- 'True'
2021-06-07 19:50:19,259 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22004' | prevHeadingCurrId :- '22003' | prevHeadingFoundId :- '22003'
2021-06-07 19:50:1

2021-06-07 19:50:34,716 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22019' | prevHeadingCurrId :- '22018' | prevHeadingFoundId :- '22018'
2021-06-07 19:50:35,862 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Qrd txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Matched :- 'True'
2021-06-07 19:50:35,880 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22020' | prevHeadingCurrId :- '22019' | prevHeadingFoundId :- '22019'
2021-06-07 19:50:36,531 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- 'MINIMUM PARTICULARS TO APPEAR ON SMALL IMMEDIATE PACKAGING UNITS' | Qrd txt :- 'MINIMUM PARTICULARS TO A


OriginalCheck



2021-06-07 19:50:39,845 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '2. METHOD OF ADMINISTRATION' | Qrd txt :- '2. METHOD OF ADMINISTRATION' | Matched :- 'True'
2021-06-07 19:50:39,863 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22029' | prevHeadingCurrId :- '22028' | prevHeadingFoundId :- '22028'
 DATE' | Qrd txt :- '8. EXPIRY DATE' | Matched :- 'True'n_ANNEX III.json_R : Match Passed : <=4|7.14|(93, 93, 93)|0.971| | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '3.       EXPIRY
2021-06-07 19:50:40,040 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22010' | prevHeadingCurrId :- '22009' | prevHeadingFoundId :- '22029'
2021-06-07 19:50:40,174 : Heading Extraction EL

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-06-07 19:50:42,898 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-06-07 19:50:42,923 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22003' | prevHeadingCurrId :- '22002' | prevHeadingFoundId :- '22002'
2021-06-07 19:50:45,054 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Qrd txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Matched :- 'True'
2021-06-07 19:50:45,075 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22004' | prevHeadingCurrId :- '22003' | prevHeadingFoundId :- '22003'
2021-06-07 19:50:4

2021-06-07 19:50:59,803 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22019' | prevHeadingCurrId :- '22018' | prevHeadingFoundId :- '22018'
2021-06-07 19:51:00,836 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Qrd txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Matched :- 'True'
2021-06-07 19:51:00,851 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22020' | prevHeadingCurrId :- '22019' | prevHeadingFoundId :- '22019'
2021-06-07 19:51:01,103 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- 'MINIMUM PARTICULARS TO APPEAR ON SMALL IMMEDIATE PACKAGING UNITS' | Qrd txt :- 'MINIMUM PARTICULARS TO A


OriginalCheck



2021-06-07 19:51:04,135 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '2. METHOD OF ADMINISTRATION' | Qrd txt :- '2. METHOD OF ADMINISTRATION' | Matched :- 'True'
2021-06-07 19:51:04,157 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22029' | prevHeadingCurrId :- '22028' | prevHeadingFoundId :- '22028'
 DATE' | Qrd txt :- '8. EXPIRY DATE' | Matched :- 'True'n_ANNEX III.json_R : Match Passed : <=4|7.14|(93, 93, 93)|0.971| | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '3.       EXPIRY
2021-06-07 19:51:04,368 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22010' | prevHeadingCurrId :- '22009' | prevHeadingFoundId :- '22029'
2021-06-07 19:51:04,612 : Heading Extraction EL

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-06-07 19:51:06,789 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-06-07 19:51:06,811 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22003' | prevHeadingCurrId :- '22002' | prevHeadingFoundId :- '22002'
2021-06-07 19:51:08,393 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Qrd txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Matched :- 'True'
2021-06-07 19:51:08,404 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22004' | prevHeadingCurrId :- '22003' | prevHeadingFoundId :- '22003'
2021-06-07 19:51:0

2021-06-07 19:51:20,612 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22019' | prevHeadingCurrId :- '22018' | prevHeadingFoundId :- '22018'
2021-06-07 19:51:21,551 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Qrd txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Matched :- 'True'
2021-06-07 19:51:21,570 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22020' | prevHeadingCurrId :- '22019' | prevHeadingFoundId :- '22019'
2021-06-07 19:51:22,101 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- 'MINIMUM PARTICULARS TO APPEAR ON SMALL IMMEDIATE PACKAGING UNITS' | Qrd txt :- 'MINIMUM PARTICULARS TO A


OriginalCheck



2021-06-07 19:51:24,491 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '2. METHOD OF ADMINISTRATION' | Qrd txt :- '2. METHOD OF ADMINISTRATION' | Matched :- 'True'
2021-06-07 19:51:24,507 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22029' | prevHeadingCurrId :- '22028' | prevHeadingFoundId :- '22028'
 DATE' | Qrd txt :- '8. EXPIRY DATE' | Matched :- 'True'n_ANNEX III.json_R : Match Passed : <=4|7.14|(93, 93, 93)|0.971| | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '3.       EXPIRY
2021-06-07 19:51:24,642 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22010' | prevHeadingCurrId :- '22009' | prevHeadingFoundId :- '22029'
2021-06-07 19:51:24,801 : Heading Extraction EL

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-06-07 19:51:26,925 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-06-07 19:51:26,943 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22003' | prevHeadingCurrId :- '22002' | prevHeadingFoundId :- '22002'
2021-06-07 19:51:28,407 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Qrd txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Matched :- 'True'
2021-06-07 19:51:28,432 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22004' | prevHeadingCurrId :- '22003' | prevHeadingFoundId :- '22003'
2021-06-07 19:51:2

2021-06-07 19:51:39,953 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22019' | prevHeadingCurrId :- '22018' | prevHeadingFoundId :- '22018'
2021-06-07 19:51:40,917 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Qrd txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Matched :- 'True'
2021-06-07 19:51:40,933 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22020' | prevHeadingCurrId :- '22019' | prevHeadingFoundId :- '22019'
2021-06-07 19:51:41,486 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- 'MINIMUM PARTICULARS TO APPEAR ON SMALL IMMEDIATE PACKAGING UNITS' | Qrd txt :- 'MINIMUM PARTICULARS TO A


OriginalCheck



2021-06-07 19:51:43,883 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '2. METHOD OF ADMINISTRATION' | Qrd txt :- '2. METHOD OF ADMINISTRATION' | Matched :- 'True'
2021-06-07 19:51:43,900 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22029' | prevHeadingCurrId :- '22028' | prevHeadingFoundId :- '22028'
 DATE' | Qrd txt :- '8. EXPIRY DATE' | Matched :- 'True'n_ANNEX III.json_R : Match Passed : <=4|7.14|(93, 93, 93)|0.971| | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '3.       EXPIRY
2021-06-07 19:51:44,020 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22010' | prevHeadingCurrId :- '22009' | prevHeadingFoundId :- '22029'
2021-06-07 19:51:44,299 : Heading Extraction EL

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-06-07 19:51:46,999 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-06-07 19:51:47,014 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22003' | prevHeadingCurrId :- '22002' | prevHeadingFoundId :- '22002'
2021-06-07 19:51:48,483 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Qrd txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Matched :- 'True'
2021-06-07 19:51:48,499 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22004' | prevHeadingCurrId :- '22003' | prevHeadingFoundId :- '22003'
2021-06-07 19:51:4

2021-06-07 19:52:00,205 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22019' | prevHeadingCurrId :- '22018' | prevHeadingFoundId :- '22018'
2021-06-07 19:52:01,092 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Qrd txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Matched :- 'True'
2021-06-07 19:52:01,117 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22020' | prevHeadingCurrId :- '22019' | prevHeadingFoundId :- '22019'
2021-06-07 19:52:01,646 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- 'MINIMUM PARTICULARS TO APPEAR ON SMALL IMMEDIATE PACKAGING UNITS' | Qrd txt :- 'MINIMUM PARTICULARS TO A


OriginalCheck



2021-06-07 19:52:04,090 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '2. METHOD OF ADMINISTRATION' | Qrd txt :- '2. METHOD OF ADMINISTRATION' | Matched :- 'True'
2021-06-07 19:52:04,098 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22029' | prevHeadingCurrId :- '22028' | prevHeadingFoundId :- '22028'
 DATE' | Qrd txt :- '8. EXPIRY DATE' | Matched :- 'True'n_ANNEX III.json_R : Match Passed : <=4|7.14|(93, 93, 93)|0.971| | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '3.       EXPIRY
2021-06-07 19:52:04,245 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22010' | prevHeadingCurrId :- '22009' | prevHeadingFoundId :- '22029'
2021-06-07 19:52:04,417 : Heading Extraction EL


OriginalCheck



2021-06-07 19:52:09,056 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Match Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '2. METHOD OF ADMINISTRATION' | Qrd txt :- '2. METHOD OF ADMINISTRATION' | Matched :- 'True'
2021-06-07 19:52:09,072 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Passed | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22029' | prevHeadingCurrId :- '22028' | prevHeadingFoundId :- '22028'
 DATE' | Qrd txt :- '8. EXPIRY DATE' | Matched :- 'True'n_ANNEX III.json_R : Match Passed : <=4|7.14|(93, 93, 93)|0.971| | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | Doc txt :- '3.       EXPIRY
2021-06-07 19:52:09,209 : Heading Extraction ELOCTA_clean_ANNEX III.json_R : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json | currHeadId :- '22010' | prevHeadingCurrId :- '22009' | prevHeadingFoundId :- '22029'
2021-06-07 19:52:09,389 : Heading Extraction EL



Heading Not Found 
 ['MINIMUM PARTICULARS TO APPEAR ON BLISTERS OR STRIPS', 'NAME OF THE MARKETING AUTHORISATION HOLDER']


dict_keys([])
Completed Heading Extraction For File
Metrics : 2: Heading Extraction,3.2316 Min,1.390068 MB,3.838104 MB,49.1%

Starting Document Annotation For File :- ELOCTA_clean_ANNEX III.json
 ['EU/1/15/1046/001', 'EU/1/15/1046/002', 'EU/1/15/1046/003', 'EU/1/15/1046/004', 'EU/1/15/1046/005', 'EU/1/15/1046/006', 'EU/1/15/1046/007', 'EU/1/15/1046/008']



EU/1/15/1046/001


2021-06-07 19:52:11,595 : Flow Logger HTML_b : Completed Document Annotation | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:52:11,604 : Flow Logger HTML_b : 2: Document Annotation,0.016 Min,0.158243 MB,0.207923 MB,49.1%
 | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:52:11,609 : Flow Logger HTML_b : Starting Extracting Content Between Heading | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:52:11,615 : ExtractContentBetween_2_N : Cleaning Match Results | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json
2021-06-07 19:52:11,626 : ExtractContentBetween_2_N : Finished Cleaning Match Results | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json


Found entry with code 220000000061
Found Packaged Product Definition
Error Found Missing Key 'holder' in entry key value pair
Completed Document Annotation
Metrics : 2: Document Annotation,0.016 Min,0.158243 MB,0.207923 MB,49.1%

Starting Extracting Content Between Heading For File :- ELOCTA_clean_ANNEX III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\partitionedJSONs\ELOCTA_clean_ANNEX III.json
--------------------------------------------


2021-06-07 19:52:13,253 : Flow Logger HTML_b : Completed Extracting Content Between Heading | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:52:13,255 : Flow Logger HTML_b : 2: Content Extraction,0.0274 Min,0.599588 MB,2.843106 MB,49.0%
 | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:52:13,303 : XmlGeneration_2_v : PMS/OMS Annotation Information Not Retrieved | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json
2021-06-07 19:52:13,304 : XmlGeneration_2_v : Initiating XML Generation | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json


Completed Extracting Content Between Heading
Metrics : 2: Content Extraction,0.0274 Min,0.599588 MB,2.843106 MB,49.0%

Already Exists


2021-06-07 19:52:13,962 : XmlGeneration_2_v : Writing to File:ELOCTA_clean_ANNEX III.xml | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json
2021-06-07 19:52:13,974 : Flow Logger HTML_b : 2: Generate XML,0.0119 Min,2.456483 MB,4.065918 MB,49.0%
 | H | CAP |  en | HTML | ELOCTA_clean.htm


Metrics : 2: Generate XML,0.0119 Min,2.456483 MB,4.065918 MB,49.0%



2021-06-07 19:52:20,761 : XML Submission Logger_2_x : Initiating Submission To FHIR Server | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json
2021-06-07 19:52:20,768 : XML Submission Logger_2_x : Response{"resourceType":"Bundle","id":"c4d61119-79cd-4bb9-a332-acd946f2042a","meta":{"versionId":"1","lastUpdated":"2021-06-07T14:22:19.684+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:ec09c316-3161-4028-89d2-1f9bd57bb37e","resource":{"resourceType":"Bundle","id":"e142008d-4fbc-4721-89ef-46db27ca67c5","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-06-07T14:22:13+00:00","entry":[{"fullUr | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json
2021-06-07 19:52:20,789 : XML Submission Logger_2_x : POST sucessful: XML added with id: c4d61119-79cd-4bb9-a332-acd946f2042a | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json
2021-06-07 19:52:20,792 : Flow Logger HTML_b : 2: Submit FHIR Msg,0.11

POST sucessful: XML added with id c4d61119-79cd-4bb9-a332-acd946f2042a
Metrics : 2: Submit FHIR Msg,0.1135 Min,0.069039 MB,1.275014 MB,49.0%

Created XML File For :- ELOCTA_clean_ANNEX III.json

listRegulatedAuthCodesAccrossePI ['EU/1/15/1046/001', 'EU/1/15/1046/002', 'EU/1/15/1046/003', 'EU/1/15/1046/004', 'EU/1/15/1046/005', 'EU/1/15/1046/006', 'EU/1/15/1046/007', 'EU/1/15/1046/008', 'EU/1/15/1046/001', 'EU/1/15/1046/002', 'EU/1/15/1046/003', 'EU/1/15/1046/004', 'EU/1/15/1046/005', 'EU/1/15/1046/006', 'EU/1/15/1046/007', 'EU/1/15/1046/008']


2021-06-07 19:52:21,864 : List Bundle Creation Logger_2_W : Getting list bundle for MAN EU/1/15/1046/002  | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json
2021-06-07 19:52:22,961 : List Bundle Creation Logger_2_W : Getting list bundle for MAN EU/1/15/1046/003  | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json
2021-06-07 19:52:23,976 : List Bundle Creation Logger_2_W : Getting list bundle for MAN EU/1/15/1046/004  | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json
2021-06-07 19:52:27,791 : List Bundle Creation Logger_2_W : Getting list bundle for MAN EU/1/15/1046/005  | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json
2021-06-07 19:52:28,821 : List Bundle Creation Logger_2_W : Getting list bundle for MAN EU/1/15/1046/006  | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json
2021-06-07 19:52:29,780 : List Bundle Creation Logger_2_W : Getting list bundle for MAN EU/1/15/1046/007  | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json
2021-06-07 19:52:30,947 : List Bundle Creation Logger_2_W : Gett

Updating


2021-06-07 19:52:42,517 : List Bundle Creation Logger_2_W : List update successfully completed 5829d349-155f-4f0a-ac97-6a29dd532996 | H | CAP |  en | 2 | ELOCTA_clean_ANNEX III.json
2021-06-07 19:52:42,517 : Flow Logger HTML_b : Completed list bundle update/addition | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:52:42,529 : Flow Logger HTML_b : 2: Update/Add List Bundle,0.3622 Min,0.291785 MB,0.445833 MB,49.1%
 | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:52:42,533 : Flow Logger HTML_b : 



||||||||||||||||||||||||||||||||3 ||||| ELOCTA_clean_ PACKAGE LEAFLET.json||||||||||||||||||||||||||||||||



 | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:52:42,550 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Starting Heading Extraction | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json


Metrics : 2: Update/Add List Bundle,0.3622 Min,0.291785 MB,0.445833 MB,49.1%

Starting Heading Extraction For File :- ELOCTA_clean_ PACKAGE LEAFLET.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\en\2021-05-21T09-52-29Z\partitionedJSONs\ELOCTA_clean_ PACKAGE LEAFLET.json
--------------------------------------------
Package leaflet


2021-06-07 19:52:43,048 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Started Extracting Heading | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json
2021-06-07 19:52:43,079 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Match Passed : <=4|16.67|(91, 100, 95)|0.913| | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | Doc txt :- 'B. PACKAGE LEAFLET' | Qrd txt :- 'PACKAGE LEAFLET' | Matched :- 'True'
2021-06-07 19:52:43,089 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Passed As This The First Heading | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-07 19:53:00,736 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Match Passed | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | Doc txt :- 'What is in this leaflet' | Qrd txt :- 'What is in this leaflet' | Matched :- 'True'
2021-06-07 19:53:00,749 : Heading Extraction E

----------------------------------
RemovedByStyle
----------------------------------


 before you use ELOCTA' | Qrd txt :- '2. What you need to know before you <take> <use> ELOCTA ' | Matched :- 'True'|(90, 80, 95)|0.911| | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | Doc txt :- '2.What you need to know
2021-06-07 19:53:01,514 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Flow Is Broken | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-06-07 19:53:01,524 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Passed | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-06-07 19:53:01,532 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Failed By Style | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'


----------------------------------
RemovedByStyle
----------------------------------


2021-06-07 19:53:02,237 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Match Passed : Contains<>|52.63|(79, 63, 86)|0.894| | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | Doc txt :- '3.How to use ELOCTA ' | Qrd txt :- '3. How to <take> <use> ELOCTA ' | Matched :- 'True'
2021-06-07 19:53:02,248 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Flow Is Broken | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23014' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-06-07 19:53:02,264 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Passed | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23014' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-06-07 19:53:02,274 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Failed By Style | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23014' | prevHeadingCurrI

----------------------------------
RemovedByStyle
----------------------------------


2021-06-07 19:53:02,775 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Match Passed : <=4|4.35|(98, 96, 98)|0.955| | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | Doc txt :- '4.Possible side effects ' | Qrd txt :- '4. Possible side effects' | Matched :- 'True'
2021-06-07 19:53:02,785 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Flow Is Broken | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23019' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-06-07 19:53:02,801 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Passed | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23019' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-06-07 19:53:02,807 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Failed By Style | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23019' | prevHeadingCurrId :- '' | 

----------------------------------
RemovedByStyle
----------------------------------


2021-06-07 19:53:03,306 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Match Passed : <=7|4.76|(98, 95, 98)|0.95| | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | Doc txt :- '5.How to store ELOCTA' | Qrd txt :- '5. How to store ELOCTA' | Matched :- 'True'
2021-06-07 19:53:03,323 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Flow Is Broken | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-06-07 19:53:03,331 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Passed | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23022' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-06-07 19:53:03,340 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Failed By Style | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23022' | prevHeadingCurrId :- '' | prevHe

----------------------------------
RemovedByStyle
----------------------------------


 and other information' | Qrd txt :- '6. Contents of the pack and other information' | Matched :- 'True'27|(99, 98, 99)|0.949| | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | Doc txt :- '6.Contents of the pack
2021-06-07 19:53:04,031 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Flow Is Broken | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23023' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-06-07 19:53:04,041 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Passed | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23023' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-06-07 19:53:04,051 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Failed By Style | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23023' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'


----------------------------------
RemovedByStyle
----------------------------------


2021-06-07 19:53:04,334 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Match Passed | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | Doc txt :- '1. What ELOCTA is and what it is used for' | Qrd txt :- '1. What ELOCTA is and what it is used for' | Matched :- 'True'
2021-06-07 19:53:04,359 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Passed | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23004' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
 you need to know before you use ELOCTA' | Qrd txt :- '2. What you need to know before you <take> <use> ELOCTA ' | Matched :- 'True'76| | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | Doc txt :- '2.       What
2021-06-07 19:53:15,593 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Passed | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '23004' | prevHeadingFoundId :- '23004'

2021-06-07 19:53:55,902 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Passed | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23019' | prevHeadingCurrId :- '23018' | prevHeadingFoundId :- '23018'
2021-06-07 19:53:59,457 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Match Passed | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Reporting of side effects' | Qrd txt :- 'Reporting of side effects' | Matched :- 'True'
2021-06-07 19:53:59,473 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Flow Is Broken | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23021' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23019'
2021-06-07 19:53:59,490 : Heading Extraction ELOCTA_clean_ PACKAGE LEAFLET.json_M : Validation Passed | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json | currHeadId :- '23021' | prevHeadingCurrId :- '23019' | prevHeadingFoundId :- '23019'
202



Heading Not Found 
 ['q This medicine is subject to additional monitoring. This will allow quick identification of new safety information. You can help by reporting any side effects you may get. See the end of section 4 for how to report side effects.', 'Children <and adolescents>', 'X with <food> <and> <,> <drink> <and> <alcohol>', 'Additional side effects in children <and adolescents>', 'For any information about this medicine, please contact the local representative of the Marketing Authorisation Holder:', 'Other sources of information', 'The following information is intended for healthcare professionals only:']


dict_keys(['1. What ELOCTA is and what it is used for', '2. What you need to know before you <take> <use> ELOCTA ', '3. How to <take> <use> ELOCTA ', '4. Possible side effects', '5. How to store ELOCTA', '6. Contents of the pack and other information'])
Completed Heading Extraction For File
Metrics : 3: Heading Extraction,1.8818 Min,0.952633 MB,3.545449 MB,49.4%

Startin

2021-06-07 19:54:35,642 : XmlGeneration_3_n : PMS/OMS Annotation Information Not Retrieved | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json
2021-06-07 19:54:35,649 : XmlGeneration_3_n : Initiating XML Generation | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json
2021-06-07 19:54:35,925 : XmlGeneration_3_n : Writing to File:ELOCTA_clean_ PACKAGE LEAFLET.xml | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json
2021-06-07 19:54:35,941 : Flow Logger HTML_b : 3: Generate XML,0.0064 Min,1.374486 MB,5.232301 MB,49.2%
 | H | CAP |  en | HTML | ELOCTA_clean.htm


Metrics : 3: Generate XML,0.0064 Min,1.374486 MB,5.232301 MB,49.2%



2021-06-07 19:54:41,471 : XML Submission Logger_3_L : Initiating Submission To FHIR Server | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json
2021-06-07 19:54:41,480 : XML Submission Logger_3_L : Response{"resourceType":"Bundle","id":"544d7f49-94f1-43a5-bef9-0b3861b414a8","meta":{"versionId":"1","lastUpdated":"2021-06-07T14:24:39.737+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:ca966948-9805-4bd5-a514-7e449d51a6a1","resource":{"resourceType":"Bundle","id":"4fc2189a-5506-4aac-a690-17c4f81acf19","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-06-07T14:24:35+00:00","entry":[{"fullUr | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json
2021-06-07 19:54:41,492 : XML Submission Logger_3_L : POST sucessful: XML added with id: 544d7f49-94f1-43a5-bef9-0b3861b414a8 | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json
2021-06-07 19:54:41,507 : Flow Logger HTML_b : 3:

POST sucessful: XML added with id 544d7f49-94f1-43a5-bef9-0b3861b414a8
Metrics : 3: Submit FHIR Msg,0.0927 Min,0.209287 MB,2.451432 MB,49.2%

Created XML File For :- ELOCTA_clean_ PACKAGE LEAFLET.json

listRegulatedAuthCodesAccrossePI ['EU/1/15/1046/001', 'EU/1/15/1046/002', 'EU/1/15/1046/003', 'EU/1/15/1046/004', 'EU/1/15/1046/005', 'EU/1/15/1046/006', 'EU/1/15/1046/007', 'EU/1/15/1046/008', 'EU/1/15/1046/001', 'EU/1/15/1046/002', 'EU/1/15/1046/003', 'EU/1/15/1046/004', 'EU/1/15/1046/005', 'EU/1/15/1046/006', 'EU/1/15/1046/007', 'EU/1/15/1046/008']


2021-06-07 19:54:42,425 : List Bundle Creation Logger_3_7 : Getting list bundle for MAN EU/1/15/1046/002  | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json
2021-06-07 19:54:43,557 : List Bundle Creation Logger_3_7 : Getting list bundle for MAN EU/1/15/1046/003  | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json
2021-06-07 19:54:48,999 : List Bundle Creation Logger_3_7 : Getting list bundle for MAN EU/1/15/1046/004  | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json
2021-06-07 19:54:50,336 : List Bundle Creation Logger_3_7 : Getting list bundle for MAN EU/1/15/1046/005  | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json
2021-06-07 19:54:51,196 : List Bundle Creation Logger_3_7 : Getting list bundle for MAN EU/1/15/1046/006  | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json
2021-06-07 19:54:52,251 : List Bundle Creation Logger_3_7 : Getting list bundle for MAN EU/1/15/1046/007  | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json
2021-06-07 19:54:53,38

Updating


2021-06-07 19:55:05,708 : List Bundle Creation Logger_3_7 : List update successfully completed 5829d349-155f-4f0a-ac97-6a29dd532996 | H | CAP |  en | 3 | ELOCTA_clean_ PACKAGE LEAFLET.json
2021-06-07 19:55:05,712 : Flow Logger HTML_b : Completed list bundle update/addition | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:55:05,716 : Flow Logger HTML_b : 3: Update/Add List Bundle,0.4034 Min,0.293688 MB,0.394328 MB,49.2%
 | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:55:05,722 : Flow Logger HTML_b : Completed Processing Partitioned Jsons | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:55:05,726 : Flow Logger HTML_b : 3: Completed,0.0001 Min,0.149088 MB,0.151427 MB,49.2%
 | H | CAP |  en | HTML | ELOCTA_clean.htm
2021-06-07 19:55:05,730 : Flow Logger HTML_b : Final Metrics,9.2637 Min,0.0 MB,18.086029 MB,49.5%
 | H | CAP |  en | HTML | ELOCTA_clean.htm


Metrics : 3: Update/Add List Bundle,0.4034 Min,0.293688 MB,0.394328 MB,49.2%

Metrics : 3: Completed,0.0001 Min,0.149088 MB,0.151427 MB,49.2%

Metrics : Final Metrics,9.2637 Min,0.0 MB,18.086029 MB,49.5%

