In [1]:
import os
import zipfile
%load_ext autoreload

%autoreload 2

In [2]:
import tracemalloc
import psutil
import pprint
import pandas as pd
import uuid
import json
import os
import glob
import re
import sys
from bs4 import NavigableString, BeautifulSoup
from collections import defaultdict
import random
import string
import time

from utils.config import config
from utils.logger.logger import loggerCreator

# ePI Modules
from parse.rulebook.rulebook import StyleRulesDictionary

from parse.extractor.parser import parserExtractor
from match.matchDocument.matchDocument import MatchDocument
from documentAnnotation.documentAnnotation import DocumentAnnotation
from htmlDocTypePartitioner.partition import DocTypePartitioner
from extractContentBetweenHeadings.dataBetweenHeadingsExtractor import DataBetweenHeadingsExtractor
from fhirXmlGenerator.fhirXmlGenerator import FhirXmlGenerator
from fhirService.fhirService import FhirService
from utils.logger.matchLogger import MatchLogger
from languageInfo.documentTypeNames.documentTypeNames import DocumentTypeNames
from listBundle.addAndUpdateListBundle.addAndUpdateListBundle import ListBundleHandler

class FolderNotFoundError(Exception):
    pass

class Metrics:
    
    def __init__(self, logFileName, logger):
        self.logFileName = logFileName
        self.start()
        self.writer = open(self.logFileName, 'a')
        self.writer.write("StepName,Time,Current Memory,Peak Memory,Used Ram Percentage\n")
        self.finalPeak = 0
        self.finalTotalTime = 0
        self.finalUsedRamPerc = 0
        self.logger = logger
    
    def start(self):
        self.startTime = time.time()
        tracemalloc.start()
    
    def getMetric(self, msg):
        
        self.endTime = time.time()
        
        self.totalTime = self.endTime - self.startTime
        
        
        current, peak = tracemalloc.get_traced_memory()
        current = current / 10**6
        peak = peak / 10**6
        
        usedRamPerc = psutil.virtual_memory()[2]
        
        self.finalPeak = max(self.finalPeak, peak)
        self.finalUsedRamPerc = max(self.finalUsedRamPerc, usedRamPerc)

        self.finalTotalTime = self.finalTotalTime + self.totalTime
        #self.finalTotalTime = round(self.finalTotalTime/60,3)
        
        outputString = f"{msg},{round(self.totalTime/60,4)} Min,{current} MB,{peak} MB,{usedRamPerc}%\n"
        
        self.logger.logFlowCheckpoint(f"{outputString}")
        
        print(f"Metrics : {outputString}")
        self.writer.write(outputString)
        tracemalloc.stop()
        tracemalloc.start()
        self.startTime = time.time()
    def end(self):
        
        current, peak = tracemalloc.get_traced_memory()
        current = current / 10**6
        outputString = f"Final Metrics,{round(self.finalTotalTime/60,4)} Min,{current} MB,{self.finalPeak} MB,{self.finalUsedRamPerc}%\n"
        print(f"Metrics : {outputString}")
        self.logger.logFlowCheckpoint(f"{outputString}")
        self.writer.write(outputString)
        self.writer.close()
        tracemalloc.stop()
        
        


def convertToInt(x):
    try:
        return str(int(x))
    except:
        return x


def convertCollectionToDataFrame(collection):

    dfExtractedHier = pd.DataFrame(collection)
    dfExtractedHier['parent_id'] = dfExtractedHier['parent_id'].apply(
        lambda x: convertToInt(x))
    dfExtractedHier['id'] = dfExtractedHier['id'].apply(
        lambda x: convertToInt(x))

    return dfExtractedHier

def getRandomString(N):
    str_ = ''.join(random.choice(string.ascii_uppercase + string.digits
                                 + string.ascii_lowercase) for _ in range(N))
    return str_


def convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog):

    module_path = os.path.join(basePath)

    if "/" in basePath:
        pathSep = "/"
    else:
        pathSep = "\\"
    
    # Generate output folder path
    output_json_path = os.path.join(basePath, 'outputJSON')

    """
        Check if input folder exists, else throw exception
    """
    if(os.path.exists(module_path)):
        filenames = glob.glob(os.path.join(module_path, htmlDocName))

        # Create language specific folder in outputJSON folder if it doesn't exist
        if(not os.path.exists(output_json_path)):
            os.mkdir(output_json_path)
        logger = MatchLogger(f'Parser_{getRandomString(1)}', htmlDocName,
                             domain, procedureType, languageCode, "HTML", fileNameLog)

        styleLogger = MatchLogger(
            f'Style Dictionary_{getRandomString(1)}', htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)

        styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                             controlBasePath=controlBasePath,
                                             language=languageCode,
                                             fileName=fileNameQrd,
                                             domain=domain,
                                             procedureType=procedureType
                                             )

        parserObj = parserExtractor(config, logger, styleRulesObj.styleRuleDict,
                                    styleRulesObj.styleFeatureKeyList,
                                    styleRulesObj.qrd_section_headings)

        for input_filename in filenames:
          # if(input_filename.find('Kalydeco II-86-PI-clean')!=-1):
            output_filename = os.path.join(output_json_path, htmlDocName)
            style_filepath =  output_filename.replace('.html','.txt')
            style_filepath =  style_filepath.replace('.txtl','.txt')
            style_filepath =  style_filepath.replace('.htm','.txt')
            print("-------------",style_filepath,"-----------------")

            output_filename = output_filename.replace('.html', '.json')
            output_filename = output_filename.replace('.htm', '.json')
            print(input_filename, output_filename)
            parserObj.createPIJsonFromHTML(input_filepath=input_filename,
                                           output_filepath=output_filename,
                                           style_filepath = style_filepath,
                                           img_base64_dict=parserObj.convertImgToBase64(input_filename)
                                           )
            
        return output_filename.split(pathSep)[-1], style_filepath
    else:
        try:    
            raise FolderNotFoundError(module_path + " not found")
        except:  
            logger.logFlowCheckpoint("Folder For Language Code Not Found In Input File")
            logger.logException("Folder For Language Code Not Found In Input File")
        raise FolderNotFoundError(module_path + " not found")
        return None


def splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog):

    styleLogger = MatchLogger(
        f'Style Dictionary_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                        controlBasePath=controlBasePath,
                                        language=languageCode,
                                        fileName=fileNameQrd,
                                        domain=domain,
                                        procedureType=procedureType
                                        )
    
    path_json = os.path.join(basePath,'outputJSON', fileNameJson)
    print("PathJson",path_json)
    partitionLogger = MatchLogger(
        f'Partition_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    partitioner = DocTypePartitioner(partitionLogger, domain, procedureType)

    partitionedJsonPaths = partitioner.partitionHtmls(
        styleRulesObj.qrd_section_headings, path_json)

    return partitionedJsonPaths


def extractAndValidateHeadings(controlBasePath,
                                basePath,
                                domain,
                                procedureType,
                                languageCode,
                                documentNumber,
                                fileNameDoc,
                                fileNameQrd,
                                fileNameMatchRuleBook,
                                fileNameDocumentTypeNames,
                                fileNameLog,
                                stopWordFilterLen=6,
                                isPackageLeaflet=False,
                                medName=None
                                ):

    if documentNumber == 0:
        topHeadingsConsidered = 4
        bottomHeadingsConsidered = 6
    elif documentNumber == 1:
        topHeadingsConsidered = 3
        bottomHeadingsConsidered = 5
    elif documentNumber == 2:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 15
    else:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 10

    print(f"Starting Heading Extraction For File :- {fileNameDoc}")
    logger = MatchLogger(f"Heading Extraction {fileNameDoc}_{getRandomString(1)}", fileNameDoc, domain, procedureType, languageCode, documentNumber, fileNameLog)
    logger.logFlowCheckpoint("Starting Heading Extraction")

    stopWordlanguage = DocumentTypeNames(
        controlBasePath=controlBasePath,
        fileNameDocumentTypeNames=fileNameDocumentTypeNames,
        languageCode=languageCode,
        domain=domain,
        procedureType=procedureType,
        documentNumber=documentNumber
        ).extractStopWordLanguage()

    matchDocObj = MatchDocument(
        logger,
        controlBasePath,
        basePath,
        domain,
        procedureType,
        languageCode,
        documentNumber,
        fileNameDoc,
        fileNameQrd,
        fileNameMatchRuleBook,
        fileNameDocumentTypeNames,
        topHeadingsConsidered,
        bottomHeadingsConsidered,
        stopWordFilterLen,
        stopWordlanguage,
        isPackageLeaflet,
        medName)
    df, coll, documentType = matchDocObj.matchHtmlHeaddingsWithQrd()

    return df, coll, documentType


def parseDocument(controlBasePath,
                  basePath,
                  htmlDocName,
                  fileNameQrd,
                  fileNameMatchRuleBook,
                  fileNameDocumentTypeNames,
                  jsonTempFileName,
                  listBundleDocumentTypeCodesFileName,
                  apiMmgtBaseUrl,
                  getListApiEndPointUrlSuffix,
                  addUpdateListApiEndPointUrlSuffix,
                  addBundleApiEndPointUrlSuffix,
                  apiMmgtSubsKey,
                  medName = None):
    
    listRegulatedAuthCodesAccrossePI = []
    
    if "/" in basePath:
        pathSep = "/"        
    else:
        pathSep = "\\"
    
    fileNameLog = os.path.join(basePath,'FinalLog.txt')

    pathComponents = basePath.split(pathSep)
    print(pathComponents, htmlDocName)
    timestamp = pathComponents[-1]
    languageCode =  pathComponents[-2]
    medName = pathComponents[-3]
    procedureType = pathComponents[-4]
    domain = pathComponents[-5]

    print(timestamp, languageCode, medName, procedureType, domain)
        
    flowLogger =  MatchLogger(f"Flow Logger HTML_{getRandomString(1)}", htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)
    
    metrics = Metrics(os.path.join(basePath,'Metrics.csv'),flowLogger)
    
    
    flowLogger.logFlowCheckpoint("Starting HTML Conversion To Json")
    ###Convert Html to Json
    fileNameJson, stylesFilePath = convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog)
    
    print("stylePath:-",stylesFilePath)
    flowLogger.logFlowCheckpoint("Completed HTML Conversion To Json")
    metrics.getMetric("HTML Conversion To Json")

    flowLogger.logFlowCheckpoint("Starting Json Split")

    ###Split Uber Json to multiple Jsons for each category.
    partitionedJsonPaths = splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog)
    
    partitionedJsonPaths = [ path.split(pathSep)[-1] for path in partitionedJsonPaths]
    flowLogger.logFlowCheckpoint(str(partitionedJsonPaths))
    
    flowLogger.logFlowCheckpoint("Completed Json Split")
    metrics.getMetric("Split Json")
    
    flowLogger.logFlowCheckpoint("Started Processing Partitioned Jsons")
    
    for index, fileNamePartitioned in enumerate(partitionedJsonPaths):
        #print("Index", index)
        #if index in [2,3]:
        #    continue
        
        flowLogger.logFlowCheckpoint(f"\n\n\n\n||||||||||||||||||||||||||||||||{str(index)} ||||| {str(fileNamePartitioned)}||||||||||||||||||||||||||||||||\n\n\n\n")
        
        if index == 3:
            stopWordFilterLen = 100
            isPackageLeaflet = True
        else:
            stopWordFilterLen = 6
            isPackageLeaflet = False
            
        df, coll, documentType = extractAndValidateHeadings(controlBasePath,
                                    basePath,
                                    domain,
                                    procedureType,
                                    languageCode,
                                    index,
                                    fileNamePartitioned,
                                    fileNameQrd,
                                    fileNameMatchRuleBook,
                                    fileNameDocumentTypeNames,
                                    fileNameLog,
                                    stopWordFilterLen=stopWordFilterLen,
                                    isPackageLeaflet=isPackageLeaflet,
                                    medName=medName)
        
        print(f"Completed Heading Extraction For File")
        flowLogger.logFlowCheckpoint("Completed Heading Extraction For File")
        metrics.getMetric(f"{index}: Heading Extraction")

        print(f"Starting Document Annotation For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Document Annotation For File")
        documentAnnotationObj = DocumentAnnotation(fileNamePartitioned,'c20835db4b1b4e108828a8537ff41506','https://spor-sit.azure-api.net/pms/api/v2/',df,coll, index)
        try:
            pms_oms_annotation_data = documentAnnotationObj.processRegulatedAuthorizationForDoc()
            print(pms_oms_annotation_data)
        except Exception as e:
            pms_oms_annotation_data = None
            print("Error Found", str(e))
            
        print(f"Completed Document Annotation")        
        flowLogger.logFlowCheckpoint("Completed Document Annotation")
        metrics.getMetric(f"{index}: Document Annotation")
        
        print(f"Starting Extracting Content Between Heading For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Extracting Content Between Heading")
        
        extractContentlogger =  MatchLogger(f'ExtractContentBetween_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        extractorObj = DataBetweenHeadingsExtractor(extractContentlogger, basePath, coll)
        dfExtractedHierRR = extractorObj.extractContentBetweenHeadings(fileNamePartitioned)
        
        print(f"Completed Extracting Content Between Heading")        
        flowLogger.logFlowCheckpoint("Completed Extracting Content Between Heading")
        metrics.getMetric(f"{index}: Content Extraction")
        
        
        xmlLogger =  MatchLogger(f'XmlGeneration_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        fhirXmlGeneratorObj = FhirXmlGenerator(xmlLogger, controlBasePath, basePath, pms_oms_annotation_data, stylesFilePath, medName)
        fileNameXml = fileNamePartitioned.replace('.json','.xml')
        generatedXml = fhirXmlGeneratorObj.generateXml(dfExtractedHierRR, fileNameXml)
        
        metrics.getMetric(f"{index}: Generate XML")
        
        fhirServiceLogger =  MatchLogger(f'XML Submission Logger_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        
        fhirServiceObj = FhirService(fhirServiceLogger, apiMmgtBaseUrl, addBundleApiEndPointUrlSuffix, apiMmgtSubsKey, basePath, generatedXml)
        fhirServiceObj.submitFhirXml()
        
        
        
        
        metrics.getMetric(f"{index}: Submit FHIR Msg")
        
        print(f"Created XML File For :- {fileNamePartitioned}")
        
        flowLogger.logFlowCheckpoint("Starting list bundle update/addition")
        if documentAnnotationObj.listRegulatedAuthorizationIdentifiers != None:
            for id in documentAnnotationObj.listRegulatedAuthorizationIdentifiers:
                listRegulatedAuthCodesAccrossePI.append(id)
        listBundleLogger =  MatchLogger(f'List Bundle Creation Logger_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        print("\nlistRegulatedAuthCodesAccrossePI",listRegulatedAuthCodesAccrossePI)
        try:
            listBundleHandler = ListBundleHandler(listBundleLogger,
                     domain,
                     procedureType,
                     index,
                     documentType,
                     languageCode,
                     medName,
                     controlBasePath,
                     jsonTempFileName,
                     listBundleDocumentTypeCodesFileName,
                     fileNameDocumentTypeNames,
                     listRegulatedAuthCodesAccrossePI,
                     apiMmgtBaseUrl,
                     getListApiEndPointUrlSuffix,
                     addUpdateListApiEndPointUrlSuffix,
                     apiMmgtSubsKey)

            listBundleXml = listBundleHandler.addOrUpdateDocumentItem(str(fhirServiceObj.SubmittedFhirMsgRefId))
            listBundleHandler.submitListXmLToServer(listBundleXml)

            flowLogger.logFlowCheckpoint("Completed list bundle update/addition")
            metrics.getMetric(f"{index}: Update/Add List Bundle")
            #return df,coll,dfExtractedHierRR
        except Exception as e:
            print(str(e))
            if 'No MAN Code found' in str(e):
                flowLogger.logFlowCheckpoint("Skipping list bundle addtion/update as no MAN found")
            
    
    flowLogger.logFlowCheckpoint("Completed Processing Partitioned Jsons")
    metrics.getMetric(f"{index}: Completed")
    metrics.end()

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\vipsharm\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
from wordToHtmlConvertor.wordToHtmlConvertor import WordToHtmlConvertor

wordToHtmlConvertorObj = WordToHtmlConvertor()
wordToHtmlConvertorObj.convertWordToHTML()

In [3]:
# inputZipFolderPath = "F:\Projects\EMA\Repository\EMA EPI PoC\\function_code\\inputblob"
inputZipFolderPath = os.path.abspath(os.path.join('..'))
inputZipFolderPath = os.path.join(inputZipFolderPath, 'inputblob')
inputZipFileName = "ELOCTA~H~CAP~de~2021-05-22T18-23-40Z.zip"

In [4]:
fileNameQrd = 'qrd_canonical_model.csv'
fileNameMatchRuleBook = 'ruleDict.json'
fileNameDocumentTypeNames = 'documentTypeNames.json'
fsMountName = '/mounted'
jsonTempFileName = 'listBundleJsonTemplate.json'
listBundleDocumentTypeCodesFileName = 'listBundleDocumentTypeCodes.json'
apiMmgtBaseUrl = "https://ema-dap-epi-tst-fhir-apim.azure-api.net"
getListApiEndPointUrlSuffix = "/epi/v1/List"
addUpdateListApiEndPointUrlSuffix = "/epi-w/v1/List"
addBundleApiEndPointUrlSuffix = "/epi-w/v1/Bundle"
apiMmgtSubsKey = "9e9d47b8a08148f9833e17462d90574a"



info = inputZipFileName.split("~")

try:
    medName = info[0]
    domain = info[1]
    procedureType = info[2]
    languageCode = info[3]
    timestamp = info[4]
    timestamp = timestamp.replace(".zip","")

except Exception:
    raise f"Missing required info in the zip file name {inputZipFileName}"

if "\\" in os.getcwd():
    localEnv = True
    inputZipFolderPath = os.path.join(os.path.abspath(os.path.join('..')),inputZipFolderPath)
    outputFolderPath = os.path.join(os.path.abspath(os.path.join('..')), 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
    controlFolderPath = os.path.join(os.path.abspath(os.path.join('..')),'control')
else:
    localEnv = False
    inputZipFolderPath = os.path.join(f'{fsMountName}',inputZipFolderPath)
    outputFolderPath = os.path.join(f'{fsMountName}', 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
    controlFolderPath = os.path.join(f'{fsMountName}','control')


print(inputZipFileName, inputZipFolderPath, outputFolderPath, controlFolderPath)

mode = 0o666

if localEnv is True:
    inputZipFolderPath = inputZipFolderPath.replace("/","\\")
    outputFolderPath = outputFolderPath.replace("/","\\")
    controlFolderPath = controlFolderPath.replace("/","\\")

try:
    os.makedirs(inputZipFolderPath, mode)
    os.makedirs(outputFolderPath, mode)
    os.makedirs(controlFolderPath, mode)

except Exception:
    print("Already Present")
    
with zipfile.ZipFile(f'{inputZipFolderPath}/{inputZipFileName}',"r") as zip_ref:
        zip_ref.extractall(outputFolderPath)
    

_,_,fileNames = next(os.walk(outputFolderPath))
htmlFileName = [fileName for fileName in fileNames if ".htm" in fileName][0]

print(htmlFileName)



ELOCTA~H~CAP~de~2021-05-22T18-23-40Z.zip F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\de\2021-05-22T18-23-40Z F:\Projects\EMA\Repository\EMA EPI PoC\function_code\control
Already Present
Elocta_clean.htm


In [None]:
parseDocument(controlFolderPath,
              outputFolderPath,
              htmlFileName,
              fileNameQrd,
              fileNameMatchRuleBook,
              fileNameDocumentTypeNames,
              jsonTempFileName,
              listBundleDocumentTypeCodesFileName,
              apiMmgtBaseUrl,
              getListApiEndPointUrlSuffix,
              addUpdateListApiEndPointUrlSuffix,
              addBundleApiEndPointUrlSuffix,
              apiMmgtSubsKey,
              medName)

2021-06-11 16:00:39,427 : Flow Logger HTML_2 : Starting HTML Conversion To Json | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:00:39,443 : Style Dictionary_q : Reading style dictionary in file: rule_dictionary_de.json | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:00:39,531 : Style Dictionary_q : Qrd Section Keys Retrieved For Style Dictionary: ZUSAMMENFASSUNG DER MERKMALE DES ARZNEIMITTELS, ANHANG II, A. ETIKETTIERUNG , B. PACKUNGSBEILAGE | H | CAP |  de | HTML | Elocta_clean.htm


['F:', 'Projects', 'EMA', 'Repository', 'EMA EPI PoC', 'function_code', 'work', 'H', 'CAP', 'ELOCTA', 'de', '2021-05-22T18-23-40Z'] Elocta_clean.htm
2021-05-22T18-23-40Z de ELOCTA CAP H
------------- F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\de\2021-05-22T18-23-40Z\outputJSON\Elocta_clean.txt -----------------
F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\de\2021-05-22T18-23-40Z\Elocta_clean.htm F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\de\2021-05-22T18-23-40Z\outputJSON\Elocta_clean.json


2021-06-11 16:00:41,588 : Parser_A : Style Information Stored In File: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\de\2021-05-22T18-23-40Z\outputJSON\Elocta_clean.txt | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:00:55,341 : Parser_A : Writing to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\de\2021-05-22T18-23-40Z\outputJSON\Elocta_clean.json | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:00:56,480 : Flow Logger HTML_2 : Completed HTML Conversion To Json | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:00:56,480 : Flow Logger HTML_2 : HTML Conversion To Json,0.2842 Min,8.633074 MB,26.609983 MB,57.8%
 | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:00:56,496 : Flow Logger HTML_2 : Starting Json Split | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:00:56,504 : Style Dictionary_o : Reading style dictionary in file: rule_dictionary_de.json | H | CAP |  de | Json | Elocta_clean.json
2021-06-

stylePath:- F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\de\2021-05-22T18-23-40Z\outputJSON\Elocta_clean.txt
Metrics : HTML Conversion To Json,0.2842 Min,8.633074 MB,26.609983 MB,57.8%

PathJson F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\de\2021-05-22T18-23-40Z\outputJSON\Elocta_clean.json
Finding Heading  SmPC 


textHtml1 | ANHANGI | textQrd1 | ANHANGII | 0.981
textHtml1 | ANHANGII | textQrd1 | ANHANGII | 1
textHtml1 | ANHANGIII | textQrd1 | ANHANGII | 0.984


2021-06-11 16:00:57,883 : Partition_8 : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\de\2021-05-22T18-23-40Z\partitionedJSONs\Elocta_clean_SmPC.json | H | CAP |  de | Json | Elocta_clean.json


endPositions [(24, 0.981), (1125, 1), (1208, 0.984)]
startPos,endPos :  0 1125
startPos,endPos :  0 1100
Finding Heading  ANHANG II 


textHtml1 | A.ETIKETTIERUNG | textQrd1 | A.ETIKETTIERUNG | 1


2021-06-11 16:00:58,750 : Partition_8 : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\de\2021-05-22T18-23-40Z\partitionedJSONs\Elocta_clean_ANHANG II.json | H | CAP |  de | Json | Elocta_clean.json


endPositions [(1238, 1)]
startPos,endPos :  1100 1238
startPos,endPos :  1100 1213
Finding Heading  A. ETIKETTIERUNG  




2021-06-11 16:00:59,623 : Partition_8 : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\de\2021-05-22T18-23-40Z\partitionedJSONs\Elocta_clean_ ETIKETTIERUNG .json | H | CAP |  de | Json | Elocta_clean.json


textHtml1 | B.PACKUNGSBEILAGE | textQrd1 | B.PACKUNGSBEILAGE | 1
endPositions [(2689, 1)]
startPos,endPos :  1213 2689
startPos,endPos :  1213 2664


2021-06-11 16:00:59,649 : Partition_8 : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\de\2021-05-22T18-23-40Z\partitionedJSONs\Elocta_clean_ PACKUNGSBEILAGE.json | H | CAP |  de | Json | Elocta_clean.json
2021-06-11 16:00:59,681 : Flow Logger HTML_2 : ['Elocta_clean_SmPC.json', 'Elocta_clean_ANHANG II.json', 'Elocta_clean_ ETIKETTIERUNG .json', 'Elocta_clean_ PACKUNGSBEILAGE.json'] | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:00:59,690 : Flow Logger HTML_2 : Completed Json Split | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:00:59,692 : Flow Logger HTML_2 : Split Json,0.0533 Min,0.239363 MB,34.575655 MB,57.7%
 | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:00:59,692 : Flow Logger HTML_2 : Started Processing Partitioned Jsons | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:00:59,692 : Flow Logger HTML_2 : 



||||||||||||||||||||||||||||||||0 ||||| Elocta_clean_SmPC.json|||||||||||||||||||||||||||||

Finding Heading  B. PACKUNGSBEILAGE 


startPos,endPos :  2664 2900
Metrics : Split Json,0.0533 Min,0.239363 MB,34.575655 MB,57.7%

Starting Heading Extraction For File :- Elocta_clean_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\de\2021-05-22T18-23-40Z\partitionedJSONs\Elocta_clean_SmPC.json
--------------------------------------------
SmPC


2021-06-11 16:01:00,436 : Heading Extraction Elocta_clean_SmPC.json_V : Started Extracting Heading | H | CAP |  de | 0 | Elocta_clean_SmPC.json
2021-06-11 16:01:00,809 : Heading Extraction Elocta_clean_SmPC.json_V : Match Passed | H | CAP |  de | 0 | Elocta_clean_SmPC.json | Doc txt :- 'ZUSAMMENFASSUNG DER MERKMALE DES ARZNEIMITTELS' | Qrd txt :- 'ZUSAMMENFASSUNG DER MERKMALE DES ARZNEIMITTELS' | Matched :- 'True'
2021-06-11 16:01:00,818 : Heading Extraction Elocta_clean_SmPC.json_V : Validation Passed As This The First Heading | H | CAP |  de | 0 | Elocta_clean_SmPC.json | currHeadId :- '16001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-11 16:01:00,890 : Heading Extraction Elocta_clean_SmPC.json_V : Match Passed | H | CAP |  de | 0 | Elocta_clean_SmPC.json | Doc txt :- '1. BEZEICHNUNG DES ARZNEIMITTELS' | Qrd txt :- '1. BEZEICHNUNG DES ARZNEIMITTELS' | Matched :- 'True'
2021-06-11 16:01:00,906 : Heading Extraction Elocta_clean_SmPC.json_V : Validation Flow Is Broken 

2021-06-11 16:01:55,786 : Heading Extraction Elocta_clean_SmPC.json_V : Match Passed | H | CAP |  de | 0 | Elocta_clean_SmPC.json | Doc txt :- '4.5 Wechselwirkungen mit anderen Arzneimitteln und sonstige Wechselwirkungen' | Qrd txt :- '4.5 Wechselwirkungen mit anderen Arzneimitteln und sonstige Wechselwirkungen' | Matched :- 'True'
2021-06-11 16:01:55,801 : Heading Extraction Elocta_clean_SmPC.json_V : Validation Passed | H | CAP |  de | 0 | Elocta_clean_SmPC.json | currHeadId :- '16020' | prevHeadingCurrId :- '16017' | prevHeadingFoundId :- '16017'
2021-06-11 16:01:56,549 : Heading Extraction Elocta_clean_SmPC.json_V : Match Passed | H | CAP |  de | 0 | Elocta_clean_SmPC.json | Doc txt :- '4.6 Fertilität, Schwangerschaft und Stillzeit' | Qrd txt :- '4.6 Fertilität, Schwangerschaft und Stillzeit' | Matched :- 'True'
2021-06-11 16:01:56,565 : Heading Extraction Elocta_clean_SmPC.json_V : Validation Passed | H | CAP |  de | 0 | Elocta_clean_SmPC.json | currHeadId :- '16022' | prevHeading

2021-06-11 16:02:55,563 : Heading Extraction Elocta_clean_SmPC.json_V : Match Passed | H | CAP |  de | 0 | Elocta_clean_SmPC.json | Doc txt :- 'Kinder und Jugendliche' | Qrd txt :- 'Kinder und Jugendliche' | Matched :- 'True'
2021-06-11 16:02:55,579 : Heading Extraction Elocta_clean_SmPC.json_V : Validation Failed As Previous Heading Found is not matching | H | CAP |  de | 0 | Elocta_clean_SmPC.json | currHeadId :- '16028' | prevHeadingCurrId :- '16027' | prevHeadingFoundId :- '16033'
2021-06-11 16:02:55,611 : Heading Extraction Elocta_clean_SmPC.json_V : Match Passed | H | CAP |  de | 0 | Elocta_clean_SmPC.json | Doc txt :- 'Kinder und Jugendliche' | Qrd txt :- 'Kinder und Jugendliche' | Matched :- 'True'
2021-06-11 16:02:55,619 : Heading Extraction Elocta_clean_SmPC.json_V : Validation Failed As Previous Heading Found is not matching | H | CAP |  de | 0 | Elocta_clean_SmPC.json | currHeadId :- '16031' | prevHeadingCurrId :- '16030' | prevHeadingFoundId :- '16033'
2021-06-11 16:02:55,

2021-06-11 16:04:22,454 : Heading Extraction Elocta_clean_SmPC.json_V : Match Passed : SpecialCase2|8.22|(96, 93, 97)|0.973| | H | CAP |  de | 0 | Elocta_clean_SmPC.json | Doc txt :- '6.6     Besondere Vorsichtsmaßnahmen für die Beseitigung und sonstige Hinweise zur Handhabung' | Qrd txt :- '6.6 Besondere Vorsichtsmaßnahmen für die Beseitigung <und sonstige Hinweise zur Handhabung>' | Matched :- 'True'
2021-06-11 16:04:22,470 : Heading Extraction Elocta_clean_SmPC.json_V : Validation Passed | H | CAP |  de | 0 | Elocta_clean_SmPC.json | currHeadId :- '16053' | prevHeadingCurrId :- '16052' | prevHeadingFoundId :- '16052'
2021-06-11 16:04:27,740 : Heading Extraction Elocta_clean_SmPC.json_V : Match Passed | H | CAP |  de | 0 | Elocta_clean_SmPC.json | Doc txt :- '7. INHABER DER ZULASSUNG' | Qrd txt :- '7. INHABER DER ZULASSUNG' | Matched :- 'True'
2021-06-11 16:04:27,773 : Heading Extraction Elocta_clean_SmPC.json_V : Validation Passed | H | CAP |  de | 0 | Elocta_clean_SmPC.json | currH



Heading Not Found 
 ['Dieses Arzneimittel unterliegt einer zusätzlichen Überwachung. Dies ermöglicht eine schnelle Identifizierung neuer Erkenntnisse über die Sicherheit. Angehörige von Gesundheitsberufen sind aufgefordert, jeden Verdachtsfall einer Nebenwirkung zu melden. Hinweise zur Meldung von Nebenwirkungen, siehe Abschnitt 4.8.', 'Allgemeine Beschreibung', 'Qualitative und quantitative Zusammensetzung', 'Vorsichtsmaßnahmen vor / bei der Handhabung bzw. vor / während der Anwendung des Arzneimittels', 'Schwangerschaft', 'Stillzeit', 'Fertilität', 'Pharmakodynamische Wirkungen', 'Resorption', 'Verteilung', 'Biotransformation', 'Elimination', 'Linearität/Nicht-Linearität', 'Pharmakokinetische/pharmakodynamische Zusammenhänge', 'Beurteilung der Risiken für die Umwelt (Environmental risk assessment [ERA])', 'Anwendung bei Kindern und Jugendlichen', 'DOSIMETRIE', 'ANWEISUNGEN ZUR ZUBEREITUNG VON RADIOAKTIVEN ARZNEIMITTELN']


dict_keys([])
Completed Heading Extraction For File
Metrics

2021-06-11 16:04:59,602 : Flow Logger HTML_2 : Completed Document Annotation | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:04:59,610 : Flow Logger HTML_2 : 0: Document Annotation,0.4018 Min,0.072618 MB,0.379353 MB,57.7%
 | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:04:59,610 : Flow Logger HTML_2 : Starting Extracting Content Between Heading | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:04:59,626 : ExtractContentBetween_0_p : Cleaning Match Results | H | CAP |  de | 0 | Elocta_clean_SmPC.json
2021-06-11 16:04:59,634 : ExtractContentBetween_0_p : Finished Cleaning Match Results | H | CAP |  de | 0 | Elocta_clean_SmPC.json


{'Author Value': None, 'Medicinal Product Definitions': [('600000576406', 'ELOCTA 250 IU - Powder and solvent for solution for injection'), ('600000576627', 'ELOCTA 500 IU - Powder and solvent for solution for injection'), ('600000576628', 'ELOCTA 750 IU - Powder and solvent for solution for injection'), ('600000576629', 'ELOCTA 1000 IU - Powder and solvent for solution for injection'), ('600000576646', 'ELOCTA 2000 IU - Powder and solvent for solution for injection'), ('600000575782', 'ELOCTA 3000 IU - Powder and solvent for solution for injection'), ('600000574371', 'ELOCTA 4000 IU - Powder and solvent for solution for injection')]}
Completed Document Annotation
Metrics : 0: Document Annotation,0.4018 Min,0.072618 MB,0.379353 MB,57.7%

Starting Extracting Content Between Heading For File :- Elocta_clean_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\de\2021-05-22T18-23-40Z\partitionedJSONs\Elocta_clean_SmPC.json
----------------

2021-06-11 16:04:59,940 : Flow Logger HTML_2 : Completed Extracting Content Between Heading | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:04:59,940 : Flow Logger HTML_2 : 0: Content Extraction,0.0055 Min,1.684951 MB,12.902946 MB,57.7%
 | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:04:59,973 : XmlGeneration_0_f : Initiating XML Generation | H | CAP |  de | 0 | Elocta_clean_SmPC.json


Completed Extracting Content Between Heading
Metrics : 0: Content Extraction,0.0055 Min,1.684951 MB,12.902946 MB,57.7%

Already Exists


2021-06-11 16:05:00,448 : XmlGeneration_0_f : Writing to File:Elocta_clean_SmPC.xml | H | CAP |  de | 0 | Elocta_clean_SmPC.json
2021-06-11 16:05:00,450 : Flow Logger HTML_2 : 0: Generate XML,0.0085 Min,5.397045 MB,12.680445 MB,57.7%
 | H | CAP |  de | HTML | Elocta_clean.htm


Metrics : 0: Generate XML,0.0085 Min,5.397045 MB,12.680445 MB,57.7%



2021-06-11 16:05:12,500 : XML Submission Logger_0_1 : Initiating Submission To FHIR Server | H | CAP |  de | 0 | Elocta_clean_SmPC.json
2021-06-11 16:05:12,500 : XML Submission Logger_0_1 : Response{"resourceType":"Bundle","id":"6c23107c-790e-441f-a4d2-e74ea27d0c52","meta":{"versionId":"1","lastUpdated":"2021-06-11T10:35:09.367+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:59d8da23-1089-4080-8e6e-275b3acf58dc","resource":{"resourceType":"Bundle","id":"bed08837-bca0-43b8-b2e2-36c63922b723","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-06-11T10:34:59+00:00","entry":[{"fullUr | H | CAP |  de | 0 | Elocta_clean_SmPC.json
2021-06-11 16:05:12,516 : XML Submission Logger_0_1 : POST sucessful: XML added with id: 6c23107c-790e-441f-a4d2-e74ea27d0c52 | H | CAP |  de | 0 | Elocta_clean_SmPC.json
2021-06-11 16:05:12,516 : Flow Logger HTML_2 : 0: Submit FHIR Msg,0.201 Min,0.06511 M

POST sucessful: XML added with id 6c23107c-790e-441f-a4d2-e74ea27d0c52
Metrics : 0: Submit FHIR Msg,0.201 Min,0.06511 MB,5.347737 MB,57.7%

Created XML File For :- Elocta_clean_SmPC.json

listRegulatedAuthCodesAccrossePI ['EU/1/15/1046/001', 'EU/1/15/1046/002', 'EU/1/15/1046/003', 'EU/1/15/1046/004', 'EU/1/15/1046/005', 'EU/1/15/1046/006', 'EU/1/15/1046/007', 'EU/1/15/1046/008']


2021-06-11 16:05:13,623 : List Bundle Creation Logger_0_F : Getting list bundle for MAN EU/1/15/1046/002  | H | CAP |  de | 0 | Elocta_clean_SmPC.json
2021-06-11 16:05:14,681 : List Bundle Creation Logger_0_F : Getting list bundle for MAN EU/1/15/1046/003  | H | CAP |  de | 0 | Elocta_clean_SmPC.json
2021-06-11 16:05:15,614 : List Bundle Creation Logger_0_F : Getting list bundle for MAN EU/1/15/1046/004  | H | CAP |  de | 0 | Elocta_clean_SmPC.json
2021-06-11 16:05:16,500 : List Bundle Creation Logger_0_F : Getting list bundle for MAN EU/1/15/1046/005  | H | CAP |  de | 0 | Elocta_clean_SmPC.json
2021-06-11 16:05:17,455 : List Bundle Creation Logger_0_F : Getting list bundle for MAN EU/1/15/1046/006  | H | CAP |  de | 0 | Elocta_clean_SmPC.json
2021-06-11 16:05:18,630 : List Bundle Creation Logger_0_F : Getting list bundle for MAN EU/1/15/1046/007  | H | CAP |  de | 0 | Elocta_clean_SmPC.json
2021-06-11 16:05:19,810 : List Bundle Creation Logger_0_F : Getting list bundle for MAN EU/1/1

Updating


2021-06-11 16:05:24,414 : List Bundle Creation Logger_0_F : List update successfully completed 9d816422-ad4a-4e8f-a92c-db2e55640821 | H | CAP |  de | 0 | Elocta_clean_SmPC.json
2021-06-11 16:05:24,422 : Flow Logger HTML_2 : Completed list bundle update/addition | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:05:24,422 : Flow Logger HTML_2 : 0: Update/Add List Bundle,0.1984 Min,0.279283 MB,0.347368 MB,57.7%
 | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:05:24,430 : Flow Logger HTML_2 : 



||||||||||||||||||||||||||||||||1 ||||| Elocta_clean_ANHANG II.json||||||||||||||||||||||||||||||||



 | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:05:24,430 : Heading Extraction Elocta_clean_ANHANG II.json_L : Starting Heading Extraction | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json


Metrics : 0: Update/Add List Bundle,0.1984 Min,0.279283 MB,0.347368 MB,57.7%

Starting Heading Extraction For File :- Elocta_clean_ANHANG II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\de\2021-05-22T18-23-40Z\partitionedJSONs\Elocta_clean_ANHANG II.json
--------------------------------------------
ANHANG II


2021-06-11 16:05:24,818 : Heading Extraction Elocta_clean_ANHANG II.json_L : Started Extracting Heading | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json
2021-06-11 16:05:24,834 : Heading Extraction Elocta_clean_ANHANG II.json_L : Match Passed | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | Doc txt :- 'ANHANG II' | Qrd txt :- 'ANHANG II' | Matched :- 'True'
2021-06-11 16:05:24,842 : Heading Extraction Elocta_clean_ANHANG II.json_L : Validation Passed As This The First Heading | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | currHeadId :- '17001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-11 16:05:24,916 : Heading Extraction Elocta_clean_ANHANG II.json_L : Match Passed : Contains<>|37.78|(84, 87, 95)|0.918| | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | Doc txt :- 'A.        HERSTELLER DES WIRKSTOFFS BIOLOGISCHEN URSPRUNGS UND HERSTELLER, DER FÜR DIE CHARGENFREIGABE VERANTWORTLICH IST' | Qrd txt :- 'A. <HERSTELLER DES WIRKSTOFFS/DER WIRKSTOFFE BIOLOGISCHEN 


OriginalCheck



2021-06-11 16:05:25,843 : Heading Extraction Elocta_clean_ANHANG II.json_L : Validation Failed As Previous Heading Found is not matching | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | currHeadId :- '17002' | prevHeadingCurrId :- '17001' | prevHeadingFoundId :- '17009'
2021-06-11 16:05:26,311 : Heading Extraction Elocta_clean_ANHANG II.json_L : End Of Sub Section | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json
2021-06-11 16:05:26,383 : Heading Extraction Elocta_clean_ANHANG II.json_L : Match Passed : Contains<>|37.78|(84, 87, 95)|0.918| | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | Doc txt :- 'A.      HERSTELLER DES WIRKSTOFFS BIOLOGISCHEN URSPRUNGS UND HERSTELLER, DER FÜR DIE CHARGENFREIGABE VERANTWORTLICH IST' | Qrd txt :- 'A. <HERSTELLER DES WIRKSTOFFS/DER WIRKSTOFFE BIOLOGISCHEN URSPRUNGS UND> HERSTELLER, DER (DIE) FÜR DIE CHARGENFREIGABE VERANTWORTLICH IST (SIND)' | Matched :- 'True'
2021-06-11 16:05:26,391 : Heading Extraction Elocta_clean_ANHANG II.json_L : Validatio

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo

OriginalCheck


OriginalCheck



2021-06-11 16:05:28,311 : Heading Extraction Elocta_clean_ANHANG II.json_L : Match Passed : >7|36.21|(85, 79, 85)|0.929| | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | Doc txt :- 'Name und Anschrift des Herstellers, der für die Chargenfreigabe verantwortlich ist' | Qrd txt :- 'Name und Anschrift des (der) Hersteller(s), der (die) für die Chargenfreigabe verantwortlich ist (sind)' | Matched :- 'True'
2021-06-11 16:05:28,327 : Heading Extraction Elocta_clean_ANHANG II.json_L : Validation Flow Is Broken | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | currHeadId :- '17004' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '17002'
2021-06-11 16:05:28,343 : Heading Extraction Elocta_clean_ANHANG II.json_L : Validation Passed | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | currHeadId :- '17004' | prevHeadingCurrId :- '17002' | prevHeadingFoundId :- '17002'



OriginalCheck



2021-06-11 16:05:29,285 : Heading Extraction Elocta_clean_ANHANG II.json_L : Match Passed | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | Doc txt :- 'B. BEDINGUNGEN ODER EINSCHRÄNKUNGEN FÜR DIE ABGABE UND DEN GEBRAUCH' | Qrd txt :- 'B. BEDINGUNGEN ODER EINSCHRÄNKUNGEN FÜR DIE ABGABE UND DEN GEBRAUCH' | Matched :- 'True'
2021-06-11 16:05:29,297 : Heading Extraction Elocta_clean_ANHANG II.json_L : Validation Passed | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | currHeadId :- '17005' | prevHeadingCurrId :- '17004' | prevHeadingFoundId :- '17004'



OriginalCheck



2021-06-11 16:05:30,452 : Heading Extraction Elocta_clean_ANHANG II.json_L : Match Passed | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | Doc txt :- 'C. SONSTIGE BEDINGUNGEN UND AUFLAGEN DER GENEHMIGUNG FÜR DAS INVERKEHRBRINGEN' | Qrd txt :- 'C. SONSTIGE BEDINGUNGEN UND AUFLAGEN DER GENEHMIGUNG FÜR DAS INVERKEHRBRINGEN' | Matched :- 'True'
2021-06-11 16:05:30,468 : Heading Extraction Elocta_clean_ANHANG II.json_L : Validation Flow Is Broken | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | currHeadId :- '17007' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '17005'
2021-06-11 16:05:30,476 : Heading Extraction Elocta_clean_ANHANG II.json_L : Validation Passed | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | currHeadId :- '17007' | prevHeadingCurrId :- '17005' | prevHeadingFoundId :- '17005'
2021-06-11 16:05:30,835 : Heading Extraction Elocta_clean_ANHANG II.json_L : Match Passed : >7|1.09|(99, 100, 100)|0.998| | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | Doc txt :- '·


OriginalCheck



2021-06-11 16:05:31,450 : Heading Extraction Elocta_clean_ANHANG II.json_L : Match Passed : <=4|3.57|(98, 100, 100)|0.994| | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | Doc txt :- '·Risikomanagement-Plan (RMP)' | Qrd txt :- 'Risikomanagement-Plan (RMP)' | Matched :- 'True'
2021-06-11 16:05:31,465 : Heading Extraction Elocta_clean_ANHANG II.json_L : Validation Passed | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | currHeadId :- '17010' | prevHeadingCurrId :- '17009' | prevHeadingFoundId :- '17009'



OriginalCheck



2021-06-11 16:05:32,527 : Heading Extraction Elocta_clean_ANHANG II.json_L : Match Passed : <=4|10.0|(95, 100, 95)|0.986| | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | Doc txt :- 'ANHANG III' | Qrd txt :- 'ANHANG II' | Matched :- 'True'
2021-06-11 16:05:32,535 : Heading Extraction Elocta_clean_ANHANG II.json_L : Validation Failed As Wrong Heading Found | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | currHeadId :- '17001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '17010'
2021-06-11 16:05:32,663 : Heading Extraction Elocta_clean_ANHANG II.json_L : End Of Sub Section | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json
2021-06-11 16:05:32,674 : Heading Extraction Elocta_clean_ANHANG II.json_L : Match Passed : <=4|10.0|(95, 100, 95)|0.986| | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json | Doc txt :- 'ANHANG III' | Qrd txt :- 'ANHANG II' | Matched :- 'True'
2021-06-11 16:05:32,682 : Heading Extraction Elocta_clean_ANHANG II.json_L : Validation Passed As This The First Headi


OriginalCheck

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo

OriginalCheck



2021-06-11 16:05:33,075 : Flow Logger HTML_2 : Completed Heading Extraction For File | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:05:33,083 : Flow Logger HTML_2 : 1: Heading Extraction,0.1442 Min,0.393145 MB,2.88395 MB,57.7%
 | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:05:33,083 : Flow Logger HTML_2 : Starting Document Annotation For File | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:05:33,099 : Flow Logger HTML_2 : Completed Document Annotation | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:05:33,099 : Flow Logger HTML_2 : 1: Document Annotation,0.0003 Min,0.154584 MB,0.157333 MB,57.7%
 | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:05:33,107 : Flow Logger HTML_2 : Starting Extracting Content Between Heading | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:05:33,115 : ExtractContentBetween_1_d : Cleaning Match Results | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json
2021-06-11 16:05:33,123 : ExtractContentBetween_1_d : Finishe



Heading Not Found 
 ['Name und Anschrift des (der) Hersteller(s) des Wirkstoffs/der Wirkstoffe biologischen Ursprungs', 'Amtliche Chargenfreigabe', 'Zusätzliche Maßnahmen zur Risikominimierung', 'Verpflichtung zur Durchführung von Maßnahmen nach der Zulassung ', 'SPEZIFISCHE VERPFLICHTUNG ZUM ABSCHLUSS VON MASSNAHMEN NACH DER ZULASSUNG <UNTER „BESONDEREN BEDINGUNGEN“> <UNTER „AUSSERGEWÖHNLICHEN UMSTÄNDEN“']


dict_keys([])
Completed Heading Extraction For File
Metrics : 1: Heading Extraction,0.1442 Min,0.393145 MB,2.88395 MB,57.7%

Starting Document Annotation For File :- Elocta_clean_ANHANG II.json
Error Found No Authorization Code Found In The Document Elocta_clean_ANHANG II.json
Completed Document Annotation
Metrics : 1: Document Annotation,0.0003 Min,0.154584 MB,0.157333 MB,57.7%

Starting Extracting Content Between Heading For File :- Elocta_clean_ANHANG II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\de\2021-05-22T18-23-40Z\p

2021-06-11 16:05:33,306 : XmlGeneration_1_Z : Writing to File:Elocta_clean_ANHANG II.xml | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json
2021-06-11 16:05:33,314 : Flow Logger HTML_2 : 1: Generate XML,0.0025 Min,0.45767 MB,0.753148 MB,57.7%
 | H | CAP |  de | HTML | Elocta_clean.htm


Metrics : 1: Generate XML,0.0025 Min,0.45767 MB,0.753148 MB,57.7%



2021-06-11 16:05:35,172 : XML Submission Logger_1_A : Initiating Submission To FHIR Server | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json
2021-06-11 16:05:35,172 : XML Submission Logger_1_A : Response{"resourceType":"Bundle","id":"982f2dd9-cc04-437d-b35d-72f94c9b6705","meta":{"versionId":"1","lastUpdated":"2021-06-11T10:35:34.678+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:e116653d-43cb-4178-86de-de3ba716fdbb","resource":{"resourceType":"Bundle","id":"5cbdf39c-6afb-497b-8bb9-e92a4a87edd8","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-06-11T10:35:33+00:00","entry":[{"fullUr | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json
2021-06-11 16:05:35,180 : XML Submission Logger_1_A : POST sucessful: XML added with id: 982f2dd9-cc04-437d-b35d-72f94c9b6705 | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json
2021-06-11 16:05:35,180 : Flow Logger HTML_2 : 1: Submit FHIR Msg,0.03

POST sucessful: XML added with id 982f2dd9-cc04-437d-b35d-72f94c9b6705
Metrics : 1: Submit FHIR Msg,0.0311 Min,0.185424 MB,0.367016 MB,57.7%

Created XML File For :- Elocta_clean_ANHANG II.json

listRegulatedAuthCodesAccrossePI ['EU/1/15/1046/001', 'EU/1/15/1046/002', 'EU/1/15/1046/003', 'EU/1/15/1046/004', 'EU/1/15/1046/005', 'EU/1/15/1046/006', 'EU/1/15/1046/007', 'EU/1/15/1046/008']


2021-06-11 16:05:36,227 : List Bundle Creation Logger_1_H : Getting list bundle for MAN EU/1/15/1046/002  | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json
2021-06-11 16:05:38,447 : List Bundle Creation Logger_1_H : Getting list bundle for MAN EU/1/15/1046/003  | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json
2021-06-11 16:05:42,368 : List Bundle Creation Logger_1_H : Getting list bundle for MAN EU/1/15/1046/004  | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json
2021-06-11 16:05:43,463 : List Bundle Creation Logger_1_H : Getting list bundle for MAN EU/1/15/1046/005  | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json
2021-06-11 16:05:45,203 : List Bundle Creation Logger_1_H : Getting list bundle for MAN EU/1/15/1046/006  | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json
2021-06-11 16:05:46,843 : List Bundle Creation Logger_1_H : Getting list bundle for MAN EU/1/15/1046/007  | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json
2021-06-11 16:05:48,303 : List Bundle Creation Logger_1_H : Gett

Updating


2021-06-11 16:05:51,510 : List Bundle Creation Logger_1_H : List update successfully completed 9d816422-ad4a-4e8f-a92c-db2e55640821 | H | CAP |  de | 1 | Elocta_clean_ANHANG II.json
2021-06-11 16:05:51,510 : Flow Logger HTML_2 : Completed list bundle update/addition | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:05:51,510 : Flow Logger HTML_2 : 1: Update/Add List Bundle,0.272 Min,0.160151 MB,0.651881 MB,57.7%
 | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:05:51,518 : Flow Logger HTML_2 : 



||||||||||||||||||||||||||||||||2 ||||| Elocta_clean_ ETIKETTIERUNG .json||||||||||||||||||||||||||||||||



 | H | CAP |  de | HTML | Elocta_clean.htm
2021-06-11 16:05:51,526 : Heading Extraction Elocta_clean_ ETIKETTIERUNG .json_K : Starting Heading Extraction | H | CAP |  de | 2 | Elocta_clean_ ETIKETTIERUNG .json


Metrics : 1: Update/Add List Bundle,0.272 Min,0.160151 MB,0.651881 MB,57.7%

Starting Heading Extraction For File :- Elocta_clean_ ETIKETTIERUNG .json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ELOCTA\de\2021-05-22T18-23-40Z\partitionedJSONs\Elocta_clean_ ETIKETTIERUNG .json
--------------------------------------------
Etikettierung


2021-06-11 16:05:52,047 : Heading Extraction Elocta_clean_ ETIKETTIERUNG .json_K : Started Extracting Heading | H | CAP |  de | 2 | Elocta_clean_ ETIKETTIERUNG .json
2021-06-11 16:05:52,103 : Heading Extraction Elocta_clean_ ETIKETTIERUNG .json_K : Match Passed : <=1|18.75|(90, 100, 95)|0.962| | H | CAP |  de | 2 | Elocta_clean_ ETIKETTIERUNG .json | Doc txt :- 'A. ETIKETTIERUNG' | Qrd txt :- 'ETIKETTIERUNG ' | Matched :- 'True'
2021-06-11 16:05:52,111 : Heading Extraction Elocta_clean_ ETIKETTIERUNG .json_K : Validation Passed As This The First Heading | H | CAP |  de | 2 | Elocta_clean_ ETIKETTIERUNG .json | currHeadId :- '18001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-11 16:05:52,135 : Heading Extraction Elocta_clean_ ETIKETTIERUNG .json_K : Match Passed : Contains<>|82.35|(68, 88, 86)|0.905| | H | CAP |  de | 2 | Elocta_clean_ ETIKETTIERUNG .json | Doc txt :- 'ANGABEN AUF DER ÄUSSEREN UMHÜLLUNG' | Qrd txt :- 'ANGABEN <AUF DER ÄUSSEREN UMHÜLLUNG> <UND> <AUF DEM 

2021-06-11 16:06:05,764 : Heading Extraction Elocta_clean_ ETIKETTIERUNG .json_K : Match Passed | H | CAP |  de | 2 | Elocta_clean_ ETIKETTIERUNG .json | Doc txt :- '14. VERKAUFSABGRENZUNG' | Qrd txt :- '14. VERKAUFSABGRENZUNG' | Matched :- 'True'
2021-06-11 16:06:05,777 : Heading Extraction Elocta_clean_ ETIKETTIERUNG .json_K : Validation Passed | H | CAP |  de | 2 | Elocta_clean_ ETIKETTIERUNG .json | currHeadId :- '18016' | prevHeadingCurrId :- '18015' | prevHeadingFoundId :- '18015'
2021-06-11 16:06:06,031 : Heading Extraction Elocta_clean_ ETIKETTIERUNG .json_K : Match Passed | H | CAP |  de | 2 | Elocta_clean_ ETIKETTIERUNG .json | Doc txt :- '15. HINWEISE FÜR DEN GEBRAUCH' | Qrd txt :- '15. HINWEISE FÜR DEN GEBRAUCH' | Matched :- 'True'
2021-06-11 16:06:06,047 : Heading Extraction Elocta_clean_ ETIKETTIERUNG .json_K : Validation Passed | H | CAP |  de | 2 | Elocta_clean_ ETIKETTIERUNG .json | currHeadId :- '18017' | prevHeadingCurrId :- '18016' | prevHeadingFoundId :- '18016'
20