In [1]:
import os
import zipfile
%load_ext autoreload

%autoreload 2

In [15]:
import tracemalloc
import psutil
import pprint
import pandas as pd
import uuid
import json
import os
import glob
import re
import sys
from bs4 import NavigableString, BeautifulSoup
from collections import defaultdict
import random
import string
import time

from utils.config import config
from utils.logger.logger import loggerCreator

# ePI Modules
from parse.rulebook.rulebook import StyleRulesDictionary

from parse.extractor.parser import parserExtractor
from match.matchDocument.matchDocument import MatchDocument
from documentAnnotation.documentAnnotation import DocumentAnnotation
from htmlDocTypePartitioner.partition import DocTypePartitioner
from extractContentBetweenHeadings.dataBetweenHeadingsExtractor import DataBetweenHeadingsExtractor
from fhirXmlGenerator.fhirXmlGenerator import FhirXmlGenerator
from fhirService.fhirService import FhirService
from utils.logger.matchLogger import MatchLogger
from languageInfo.documentTypeNames.documentTypeNames import DocumentTypeNames
from listBundle.addAndUpdateListBundle.addAndUpdateListBundle import ListBundleHandler

class FolderNotFoundError(Exception):
    pass

class Metrics:
    
    def __init__(self, logFileName, logger):
        self.logFileName = logFileName
        self.start()
        self.writer = open(self.logFileName, 'a')
        self.writer.write("StepName,Time,Current Memory,Peak Memory,Used Ram Percentage\n")
        self.finalPeak = 0
        self.finalTotalTime = 0
        self.finalUsedRamPerc = 0
        self.logger = logger
    
    def start(self):
        self.startTime = time.time()
        tracemalloc.start()
    
    def getMetric(self, msg):
        
        self.endTime = time.time()
        
        self.totalTime = self.endTime - self.startTime
        
        
        current, peak = tracemalloc.get_traced_memory()
        current = current / 10**6
        peak = peak / 10**6
        
        usedRamPerc = psutil.virtual_memory()[2]
        
        self.finalPeak = max(self.finalPeak, peak)
        self.finalUsedRamPerc = max(self.finalUsedRamPerc, usedRamPerc)

        self.finalTotalTime = self.finalTotalTime + self.totalTime
        #self.finalTotalTime = round(self.finalTotalTime/60,3)
        
        outputString = f"{msg},{round(self.totalTime/60,4)} Min,{current} MB,{peak} MB,{usedRamPerc}%\n"
        
        self.logger.logFlowCheckpoint(f"{outputString}")
        
        print(f"Metrics : {outputString}")
        self.writer.write(outputString)
        tracemalloc.stop()
        tracemalloc.start()
        self.startTime = time.time()
    def end(self):
        
        current, peak = tracemalloc.get_traced_memory()
        current = current / 10**6
        outputString = f"Final Metrics,{round(self.finalTotalTime/60,4)} Min,{current} MB,{self.finalPeak} MB,{self.finalUsedRamPerc}%\n"
        print(f"Metrics : {outputString}")
        self.logger.logFlowCheckpoint(f"{outputString}")
        self.writer.write(outputString)
        self.writer.close()
        tracemalloc.stop()
        
        


def convertToInt(x):
    try:
        return str(int(x))
    except:
        return x


def convertCollectionToDataFrame(collection):

    dfExtractedHier = pd.DataFrame(collection)
    dfExtractedHier['parent_id'] = dfExtractedHier['parent_id'].apply(
        lambda x: convertToInt(x))
    dfExtractedHier['id'] = dfExtractedHier['id'].apply(
        lambda x: convertToInt(x))

    return dfExtractedHier

def getRandomString(N):
    str_ = ''.join(random.choice(string.ascii_uppercase + string.digits
                                 + string.ascii_lowercase) for _ in range(N))
    return str_


def convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog):

    module_path = os.path.join(basePath)

    if "/" in basePath:
        pathSep = "/"
    else:
        pathSep = "\\"
    
    # Generate output folder path
    output_json_path = os.path.join(basePath, 'outputJSON')

    """
        Check if input folder exists, else throw exception
    """
    if(os.path.exists(module_path)):
        filenames = glob.glob(os.path.join(module_path, htmlDocName))

        # Create language specific folder in outputJSON folder if it doesn't exist
        if(not os.path.exists(output_json_path)):
            os.mkdir(output_json_path)
        logger = MatchLogger(f'Parser_{getRandomString(1)}', htmlDocName,
                             domain, procedureType, languageCode, "HTML", fileNameLog)

        styleLogger = MatchLogger(
            f'Style Dictionary_{getRandomString(1)}', htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)

        styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                             controlBasePath=controlBasePath,
                                             language=languageCode,
                                             fileName=fileNameQrd,
                                             domain=domain,
                                             procedureType=procedureType
                                             )

        parserObj = parserExtractor(config, logger, styleRulesObj.styleRuleDict,
                                    styleRulesObj.styleFeatureKeyList,
                                    styleRulesObj.qrd_section_headings)

        for input_filename in filenames:
          # if(input_filename.find('Kalydeco II-86-PI-clean')!=-1):
            output_filename = os.path.join(output_json_path, htmlDocName)
            style_filepath =  output_filename.replace('.html','.txt')
            style_filepath =  style_filepath.replace('.txtl','.txt')
            style_filepath =  style_filepath.replace('.htm','.txt')
            print("-------------",style_filepath,"-----------------")

            output_filename = output_filename.replace('.html', '.json')
            output_filename = output_filename.replace('.htm', '.json')
            print(input_filename, output_filename)
            parserObj.createPIJsonFromHTML(input_filepath=input_filename,
                                           output_filepath=output_filename,
                                           style_filepath = style_filepath,
                                           img_base64_dict=parserObj.convertImgToBase64(input_filename)
                                           )
            
        return output_filename.split(pathSep)[-1], style_filepath
    else:
        try:    
            raise FolderNotFoundError(module_path + " not found")
        except:  
            logger.logFlowCheckpoint("Folder For Language Code Not Found In Input File")
            logger.logException("Folder For Language Code Not Found In Input File")
        raise FolderNotFoundError(module_path + " not found")
        return None


def splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog):

    styleLogger = MatchLogger(
        f'Style Dictionary_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                        controlBasePath=controlBasePath,
                                        language=languageCode,
                                        fileName=fileNameQrd,
                                        domain=domain,
                                        procedureType=procedureType
                                        )
    
    path_json = os.path.join(basePath,'outputJSON', fileNameJson)
    print("PathJson",path_json)
    partitionLogger = MatchLogger(
        f'Partition_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    partitioner = DocTypePartitioner(partitionLogger)

    partitionedJsonPaths = partitioner.partitionHtmls(
        styleRulesObj.qrd_section_headings, path_json)

    return partitionedJsonPaths


def extractAndValidateHeadings(controlBasePath,
                                basePath,
                                domain,
                                procedureType,
                                languageCode,
                                documentNumber,
                                fileNameDoc,
                                fileNameQrd,
                                fileNameMatchRuleBook,
                                fileNameDocumentTypeNames,
                                fileNameLog,
                                stopWordFilterLen=6,
                                isPackageLeaflet=False,
                                medName=None
                                ):

    if documentNumber == 0:
        topHeadingsConsidered = 4
        bottomHeadingsConsidered = 6
    elif documentNumber == 1:
        topHeadingsConsidered = 3
        bottomHeadingsConsidered = 5
    elif documentNumber == 2:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 15
    else:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 10

    print(f"Starting Heading Extraction For File :- {fileNameDoc}")
    logger = MatchLogger(f"Heading Extraction {fileNameDoc}_{getRandomString(1)}", fileNameDoc, domain, procedureType, languageCode, documentNumber, fileNameLog)
    logger.logFlowCheckpoint("Starting Heading Extraction")

    stopWordlanguage = DocumentTypeNames(
        controlBasePath=controlBasePath,
        fileNameDocumentTypeNames=fileNameDocumentTypeNames,
        languageCode=languageCode,
        domain=domain,
        procedureType=procedureType,
        documentNumber=documentNumber
        ).extractStopWordLanguage()

    matchDocObj = MatchDocument(
        logger,
        controlBasePath,
        basePath,
        domain,
        procedureType,
        languageCode,
        documentNumber,
        fileNameDoc,
        fileNameQrd,
        fileNameMatchRuleBook,
        fileNameDocumentTypeNames,
        topHeadingsConsidered,
        bottomHeadingsConsidered,
        stopWordFilterLen,
        stopWordlanguage,
        isPackageLeaflet,
        medName)
    df, coll, documentType = matchDocObj.matchHtmlHeaddingsWithQrd()

    return df, coll, documentType


def parseDocument(controlBasePath,
                  basePath,
                  htmlDocName,
                  fileNameQrd,
                  fileNameMatchRuleBook,
                  fileNameDocumentTypeNames,
                  jsonTempFileName,
                  listBundleDocumentTypeCodesFileName,
                  apiMmgtBaseUrl,
                  getListApiEndPointUrlSuffix,
                  addUpdateListApiEndPointUrlSuffix,
                  apiMmgtSubsKey,
                  submitFhirUrl,
                  medName = None):
    
    listRegulatedAuthCodesAccrossePI = []
    
    if "/" in basePath:
        pathSep = "/"        
    else:
        pathSep = "\\"
    
    fileNameLog = os.path.join(basePath,'FinalLog.txt')

    pathComponents = basePath.split(pathSep)
    print(pathComponents, htmlDocName)
    timestamp = pathComponents[-1]
    languageCode =  pathComponents[-2]
    medName = pathComponents[-3]
    procedureType = pathComponents[-4]
    domain = pathComponents[-5]

    print(timestamp, languageCode, medName, procedureType, domain)
        
    flowLogger =  MatchLogger(f"Flow Logger HTML_{getRandomString(1)}", htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)
    
    metrics = Metrics(os.path.join(basePath,'Metrics.csv'),flowLogger)
    
    
    flowLogger.logFlowCheckpoint("Starting HTML Conversion To Json")
    ###Convert Html to Json
    fileNameJson, stylesFilePath = convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog)
    
    print("stylePath:-",stylesFilePath)
    flowLogger.logFlowCheckpoint("Completed HTML Conversion To Json")
    metrics.getMetric("HTML Conversion To Json")

    flowLogger.logFlowCheckpoint("Starting Json Split")

    ###Split Uber Json to multiple Jsons for each category.
    partitionedJsonPaths = splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog)
    
    partitionedJsonPaths = [ path.split(pathSep)[-1] for path in partitionedJsonPaths]
    flowLogger.logFlowCheckpoint(str(partitionedJsonPaths))
    
    flowLogger.logFlowCheckpoint("Completed Json Split")
    metrics.getMetric("Split Json")
    
    flowLogger.logFlowCheckpoint("Started Processing Partitioned Jsons")
    
    for index, fileNamePartitioned in enumerate(partitionedJsonPaths):
        print("Index", index)
        if index in [0]:
            continue
        flowLogger.logFlowCheckpoint(f"\n\n\n\n||||||||||||||||||||||||||||||||{str(index)} ||||| {str(fileNamePartitioned)}||||||||||||||||||||||||||||||||\n\n\n\n")
        
        if index == 3:
            stopWordFilterLen = 100
            isPackageLeaflet = True
        else:
            stopWordFilterLen = 6
            isPackageLeaflet = False
            
        df, coll, documentType = extractAndValidateHeadings(controlBasePath,
                                    basePath,
                                    domain,
                                    procedureType,
                                    languageCode,
                                    index,
                                    fileNamePartitioned,
                                    fileNameQrd,
                                    fileNameMatchRuleBook,
                                    fileNameDocumentTypeNames,
                                    fileNameLog,
                                    stopWordFilterLen=stopWordFilterLen,
                                    isPackageLeaflet=isPackageLeaflet,
                                    medName=medName)
    
        
        print(f"Completed Heading Extraction For File")
        flowLogger.logFlowCheckpoint("Completed Heading Extraction For File")
        metrics.getMetric(f"{index}: Heading Extraction")

        print(f"Starting Document Annotation For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Document Annotation For File")
        documentAnnotationObj = DocumentAnnotation(fileNamePartitioned,'c20835db4b1b4e108828a8537ff41506','https://spor-sit.azure-api.net/pms/api/v2/',df,coll, index)
        try:
            pms_oms_annotation_data = documentAnnotationObj.processRegulatedAuthorizationForDoc()
            print(pms_oms_annotation_data)
        except Exception as e:
            pms_oms_annotation_data = None
            print("Error Found", str(e))
            
        print(f"Completed Document Annotation")        
        flowLogger.logFlowCheckpoint("Completed Document Annotation")
        metrics.getMetric(f"{index}: Document Annotation")
        
        print(f"Starting Extracting Content Between Heading For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Extracting Content Between Heading")
        
        extractContentlogger =  MatchLogger(f'ExtractContentBetween_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        extractorObj = DataBetweenHeadingsExtractor(extractContentlogger, basePath, coll)
        dfExtractedHierRR = extractorObj.extractContentBetweenHeadings(fileNamePartitioned)
        
        print(f"Completed Extracting Content Between Heading")        
        flowLogger.logFlowCheckpoint("Completed Extracting Content Between Heading")
        metrics.getMetric(f"{index}: Content Extraction")
        
        
        xmlLogger =  MatchLogger(f'XmlGeneration_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        fhirXmlGeneratorObj = FhirXmlGenerator(xmlLogger, controlBasePath, basePath, pms_oms_annotation_data, stylesFilePath, medName)
        fileNameXml = fileNamePartitioned.replace('.json','.xml')
        generatedXml = fhirXmlGeneratorObj.generateXml(dfExtractedHierRR, fileNameXml)
        
        metrics.getMetric(f"{index}: Generate XML")
        
        fhirServiceLogger =  MatchLogger(f'XML Submission Logger_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)

        fhirServiceObj = FhirService(fhirServiceLogger, submitFhirUrl, basePath, generatedXml)
        fhirServiceObj.submitFhirXml()
        
        
        
        
        metrics.getMetric(f"{index}: Submit FHIR Msg")
        
        print(f"Created XML File For :- {fileNamePartitioned}")
        
        flowLogger.logFlowCheckpoint("Starting list bundle update/addition")
        if documentAnnotationObj.listRegulatedAuthorizationIdentifiers != None:
            for id in documentAnnotationObj.listRegulatedAuthorizationIdentifiers:
                listRegulatedAuthCodesAccrossePI.append(id)
        listBundleLogger =  MatchLogger(f'List Bundle Creation Logger_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        print("\nlistRegulatedAuthCodesAccrossePI",listRegulatedAuthCodesAccrossePI)
        try:
            listBundleHandler = ListBundleHandler(listBundleLogger,
                     domain,
                     procedureType,
                     index,
                     documentType,
                     languageCode,
                     medName,
                     controlBasePath,
                     jsonTempFileName,
                     listBundleDocumentTypeCodesFileName,
                     fileNameDocumentTypeNames,
                     listRegulatedAuthCodesAccrossePI,
                     apiMmgtBaseUrl,
                     getListApiEndPointUrlSuffix,
                     addUpdateListApiEndPointUrlSuffix,
                     apiMmgtSubsKey)

            listBundleXml = listBundleHandler.addOrUpdateDocumentItem(str(fhirServiceObj.SubmittedFhirMsgRefId))
            listBundleHandler.submitListXmLToServer(listBundleXml)

            flowLogger.logFlowCheckpoint("Completed list bundle update/addition")
            metrics.getMetric(f"{index}: Update/Add List Bundle")
            #return df,coll,dfExtractedHierRR
        except Exception as e:
            print(str(e))
            if 'No MAN Code found' in str(e):
                flowLogger.logFlowCheckpoint("Skipping list bundle addtion/update as no MAN found")
            
    
    flowLogger.logFlowCheckpoint("Completed Processing Partitioned Jsons")
    metrics.getMetric(f"{index}: Completed")
    metrics.end()

In [16]:
# inputZipFolderPath = "F:\Projects\EMA\Repository\EMA EPI PoC\\function_code\\inputblob"
inputZipFolderPath = os.path.abspath(os.path.join('..'))
inputZipFolderPath = os.path.join(inputZipFolderPath, 'inputblob')
inputZipFileName = "Karvea~H~CAP~da~2021-05-25T07-18-55Z.zip"

In [17]:
fileNameQrd = 'qrd_canonical_model.csv'
fileNameMatchRuleBook = 'ruleDict.json'
fileNameDocumentTypeNames = 'documentTypeNames.json'
fsMountName = '/mounted'
jsonTempFileName = 'listBundleJsonTemplate.json'
listBundleDocumentTypeCodesFileName = 'listBundleDocumentTypeCodes.json'
apiMmgtBaseUrl = "https://ema-dap-epi-dev-fhir-apim.azure-api.net"
getListApiEndPointUrlSuffix = "/epi/v1/List"
addUpdateListApiEndPointUrlSuffix = "/epi-w/v1/List"
apiMmgtSubsKey = "ba6d7e9a73ed4facaa58fc983bf6db50"
submitFhirUrl = "https://ema-dap-epi-dev-fhir-api.azurewebsites.net/Bundle"


info = inputZipFileName.split("~")

try:
    medName = info[0]
    domain = info[1]
    procedureType = info[2]
    languageCode = info[3]
    timestamp = info[4]
    timestamp = timestamp.replace(".zip","")

except Exception:
    raise f"Missing required info in the zip file name {inputZipFileName}"

if "\\" in os.getcwd():
    localEnv = True
    inputZipFolderPath = os.path.join(os.path.abspath(os.path.join('..')),inputZipFolderPath)
    outputFolderPath = os.path.join(os.path.abspath(os.path.join('..')), 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
    controlFolderPath = os.path.join(os.path.abspath(os.path.join('..')),'control')
else:
    localEnv = False
    inputZipFolderPath = os.path.join(f'{fsMountName}',inputZipFolderPath)
    outputFolderPath = os.path.join(f'{fsMountName}', 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
    controlFolderPath = os.path.join(f'{fsMountName}','control')


print(inputZipFileName, inputZipFolderPath, outputFolderPath, controlFolderPath)

mode = 0o666

if localEnv is True:
    inputZipFolderPath = inputZipFolderPath.replace("/","\\")
    outputFolderPath = outputFolderPath.replace("/","\\")
    controlFolderPath = controlFolderPath.replace("/","\\")

try:
    os.makedirs(inputZipFolderPath, mode)
    os.makedirs(outputFolderPath, mode)
    os.makedirs(controlFolderPath, mode)

except Exception:
    print("Already Present")
    
with zipfile.ZipFile(f'{inputZipFolderPath}/{inputZipFileName}',"r") as zip_ref:
        zip_ref.extractall(outputFolderPath)
    

_,_,fileNames = next(os.walk(outputFolderPath))
htmlFileName = [fileName for fileName in fileNames if ".htm" in fileName][0]

print(htmlFileName)



Karvea~H~CAP~da~2021-05-25T07-18-55Z.zip F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Karvea\da\2021-05-25T07-18-55Z F:\Projects\EMA\Repository\EMA EPI PoC\function_code\control
Already Present
Karvea_clean.htm


In [None]:
parseDocument(controlFolderPath,
              outputFolderPath,
              htmlFileName,
              fileNameQrd,
              fileNameMatchRuleBook,
              fileNameDocumentTypeNames,
              jsonTempFileName,
              listBundleDocumentTypeCodesFileName,
              apiMmgtBaseUrl,
              getListApiEndPointUrlSuffix,
              addUpdateListApiEndPointUrlSuffix,
              apiMmgtSubsKey,
              submitFhirUrl,
              medName)

2021-06-03 04:22:38,738 : Flow Logger HTML_E : Starting HTML Conversion To Json | H | CAP |  da | HTML | Karvea_clean.htm
2021-06-03 04:22:38,745 : Style Dictionary_2 : Reading style dictionary in file: rule_dictionary_da.json | H | CAP |  da | HTML | Karvea_clean.htm
2021-06-03 04:22:38,776 : Style Dictionary_2 : Qrd Section Keys Retrieved For Style Dictionary: BILAG I, BILAG II, BILAG III, B. INDLÆGSSEDDEL | H | CAP |  da | HTML | Karvea_clean.htm


['F:', 'Projects', 'EMA', 'Repository', 'EMA EPI PoC', 'function_code', 'work', 'H', 'CAP', 'Karvea', 'da', '2021-05-25T07-18-55Z'] Karvea_clean.htm
2021-05-25T07-18-55Z da Karvea CAP H
------------- F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Karvea\da\2021-05-25T07-18-55Z\outputJSON\Karvea_clean.txt -----------------
F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Karvea\da\2021-05-25T07-18-55Z\Karvea_clean.htm F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Karvea\da\2021-05-25T07-18-55Z\outputJSON\Karvea_clean.json


2021-06-03 04:22:45,216 : Parser_X : Style Information Stored In File: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Karvea\da\2021-05-25T07-18-55Z\outputJSON\Karvea_clean.txt | H | CAP |  da | HTML | Karvea_clean.htm
2021-06-03 04:23:03,288 : Parser_X : Writing to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Karvea\da\2021-05-25T07-18-55Z\outputJSON\Karvea_clean.json | H | CAP |  da | HTML | Karvea_clean.htm
2021-06-03 04:23:04,403 : Flow Logger HTML_E : Completed HTML Conversion To Json | H | CAP |  da | HTML | Karvea_clean.htm
2021-06-03 04:23:04,408 : Flow Logger HTML_E : HTML Conversion To Json,0.4279 Min,31.620001 MB,40.640026 MB,48.9%
 | H | CAP |  da | HTML | Karvea_clean.htm
2021-06-03 04:23:04,466 : Flow Logger HTML_E : Starting Json Split | H | CAP |  da | HTML | Karvea_clean.htm
2021-06-03 04:23:04,477 : Style Dictionary_p : Reading style dictionary in file: rule_dictionary_da.json | H | CAP |  da | Json | Karvea_clean.json
2021-06

stylePath:- F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Karvea\da\2021-05-25T07-18-55Z\outputJSON\Karvea_clean.txt
Metrics : HTML Conversion To Json,0.4279 Min,31.620001 MB,40.640026 MB,48.9%

PathJson F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Karvea\da\2021-05-25T07-18-55Z\outputJSON\Karvea_clean.json


2021-06-03 04:23:05,375 : Partition_4 : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Karvea\da\2021-05-25T07-18-55Z\partitionedJSONs\Karvea_clean_SmPC.json | H | CAP |  da | Json | Karvea_clean.json
2021-06-03 04:23:05,766 : Partition_4 : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Karvea\da\2021-05-25T07-18-55Z\partitionedJSONs\Karvea_clean_BILAG II.json | H | CAP |  da | Json | Karvea_clean.json
2021-06-03 04:23:06,081 : Partition_4 : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Karvea\da\2021-05-25T07-18-55Z\partitionedJSONs\Karvea_clean_BILAG III.json | H | CAP |  da | Json | Karvea_clean.json
2021-06-03 04:23:06,098 : Partition_4 : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Karvea\da\2021-05-25T07-18-55Z\partitionedJSONs\Karvea_clean_ INDLÆGSSEDDEL.json | H | CAP |  da | Json | Karvea_clean.json
2

Metrics : Split Json,0.0279 Min,0.258844 MB,15.839187 MB,48.9%

Index 0
Index 1
Starting Heading Extraction For File :- Karvea_clean_BILAG II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Karvea\da\2021-05-25T07-18-55Z\partitionedJSONs\Karvea_clean_BILAG II.json
--------------------------------------------
BILAG II


2021-06-03 04:23:06,477 : Heading Extraction Karvea_clean_BILAG II.json_K : Started Extracting Heading | H | CAP |  da | 1 | Karvea_clean_BILAG II.json
2021-06-03 04:23:06,489 : Heading Extraction Karvea_clean_BILAG II.json_K : Match Passed | H | CAP |  da | 1 | Karvea_clean_BILAG II.json | Doc txt :- 'BILAG II' | Qrd txt :- 'BILAG II' | Matched :- 'True'
2021-06-03 04:23:06,496 : Heading Extraction Karvea_clean_BILAG II.json_K : Validation Passed As This The First Heading | H | CAP |  da | 1 | Karvea_clean_BILAG II.json | currHeadId :- '9001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:23:06,535 : Heading Extraction Karvea_clean_BILAG II.json_K : Match Passed : Contains<>|126.09|(58, 89, 86)|0.685| | H | CAP |  da | 1 | Karvea_clean_BILAG II.json | Doc txt :- 'A.      FREMSTILLERE ANSVARLIGE FOR BATCHFRIGIVELSE' | Qrd txt :- 'A. <FREMSTILLER(E) AF DET (DE) BIOLOGISK AKTIVE STOF(FER) OG> FREMSTILLER(E) ANSVARLIG(E) FOR BATCHFRIGIVELSE' | Matched :- 'True'
2021-06


OriginalCheck


OriginalCheck



2021-06-03 04:23:07,360 : Heading Extraction Karvea_clean_BILAG II.json_K : End Of Sub Section | H | CAP |  da | 1 | Karvea_clean_BILAG II.json
2021-06-03 04:23:07,394 : Heading Extraction Karvea_clean_BILAG II.json_K : Match Passed : Contains<>|126.09|(58, 89, 86)|0.685| | H | CAP |  da | 1 | Karvea_clean_BILAG II.json | Doc txt :- 'A.      FREMSTILLERE ANSVARLIGE FOR BATCHFRIGIVELSE' | Qrd txt :- 'A. <FREMSTILLER(E) AF DET (DE) BIOLOGISK AKTIVE STOF(FER) OG> FREMSTILLER(E) ANSVARLIG(E) FOR BATCHFRIGIVELSE' | Matched :- 'True'
2021-06-03 04:23:07,402 : Heading Extraction Karvea_clean_BILAG II.json_K : Validation Passed As This The First Heading | H | CAP |  da | 1 | Karvea_clean_BILAG II.json | currHeadId :- '9002' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
 er ansvarlige for batchfrigivelse' | Qrd txt :- 'A. <FREMSTILLER(E) AF DET (DE) BIOLOGISK AKTIVE STOF(FER) OG> FREMSTILLER(E) ANSVARLIG(E) FOR BATCHFRIGIVELSE' | Matched :- 'False'II.json | Doc txt :- 'Navn og adresse på

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo

OriginalCheck



2021-06-03 04:23:09,533 : Heading Extraction Karvea_clean_BILAG II.json_K : Match Failed In Lowercase : Contains<>|465.0|(8, 20, 86)|0.398| | H | CAP |  da | 1 | Karvea_clean_BILAG II.json | Doc txt :- 'Sanofi-Aventis, S.A.' | Qrd txt :- 'A. <FREMSTILLER(E) AF DET (DE) BIOLOGISK AKTIVE STOF(FER) OG> FREMSTILLER(E) ANSVARLIG(E) FOR BATCHFRIGIVELSE' | Matched :- 'False'



OriginalCheck



2021-06-03 04:23:10,709 : Heading Extraction Karvea_clean_BILAG II.json_K : Match Passed | H | CAP |  da | 1 | Karvea_clean_BILAG II.json | Doc txt :- 'B. BETINGELSER ELLER BEGRÆNSNINGER VEDRØRENDE UDLEVERING OG ANVENDELSE' | Qrd txt :- 'B. BETINGELSER ELLER BEGRÆNSNINGER VEDRØRENDE UDLEVERING OG ANVENDELSE' | Matched :- 'True'
2021-06-03 04:23:10,720 : Heading Extraction Karvea_clean_BILAG II.json_K : Validation Flow Is Broken | H | CAP |  da | 1 | Karvea_clean_BILAG II.json | currHeadId :- '9005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '9002'
2021-06-03 04:23:10,726 : Heading Extraction Karvea_clean_BILAG II.json_K : Validation Passed | H | CAP |  da | 1 | Karvea_clean_BILAG II.json | currHeadId :- '9005' | prevHeadingCurrId :- '9002' | prevHeadingFoundId :- '9002'
2021-06-03 04:23:10,852 : Heading Extraction Karvea_clean_BILAG II.json_K : Match Failed In Lowercase : <=4|158.62|(21, 31, 86)|0.491| | H | CAP |  da | 1 | Karvea_clean_BILAG II.json | Doc txt :- 'Lægemidlet er 


OriginalCheck


OriginalCheck


OriginalCheck



2021-06-03 04:23:11,055 : Heading Extraction Karvea_clean_BILAG II.json_K : Match Failed In Lowercase : Contains<>|118.33|(18, 35, 86)|0.646| | H | CAP |  da | 1 | Karvea_clean_BILAG II.json | Doc txt :- 'C.ANDRE FORHOLD OG BETINGELSER FOR MARKEDSFØRINGSTILLADELSEN' | Qrd txt :- 'A. <FREMSTILLER(E) AF DET (DE) BIOLOGISK AKTIVE STOF(FER) OG> FREMSTILLER(E) ANSVARLIG(E) FOR BATCHFRIGIVELSE' | Matched :- 'False'
2021-06-03 04:23:11,131 : Heading Extraction Karvea_clean_BILAG II.json_K : Match Passed : <=7|13.33|(93, 87, 95)|0.934| | H | CAP |  da | 1 | Karvea_clean_BILAG II.json | Doc txt :- 'C.ANDRE FORHOLD OG BETINGELSER FOR MARKEDSFØRINGSTILLADELSEN' | Qrd txt :- 'C. ANDRE FORHOLD OG BETINGELSER FOR MARKEDSFØRINGSTILLADELSEN' | Matched :- 'True'
2021-06-03 04:23:11,142 : Heading Extraction Karvea_clean_BILAG II.json_K : Validation Flow Is Broken | H | CAP |  da | 1 | Karvea_clean_BILAG II.json | currHeadId :- '9007' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '9005'
2021-06-03 04



Heading Not Found 
 ['Navn og adresse på fremstilleren (fremstillerne) af det (de) biologisk aktive stof(fer)', 'Navn og adresse på den fremstiller (de fremstillere), der er ansvarlig(e) for batchfrigivelse', 'Officiel batchfrigivelse', 'Yderligere risikominimeringsforanstaltninger', 'Forpligtelse til at gennemføre foranstaltninger efter udstedelse af markedsføringstilladelse', 'SÆRLIG FORPLIGTELSE TIL AT AFSLUTTE FORANSTALTNINGER EFTER UDSTEDELSE AF MARKEDSFØRINGSTILLADELSE TIL LÆGEMIDLER GODKENDT UNDER <BETINGEDE OMSTÆNDIGHEDER> <SÆRLIGE VILKÅR>']


dict_keys([])
Completed Heading Extraction For File
Metrics : 1: Heading Extraction,0.092 Min,0.378727 MB,2.982893 MB,48.8%

Starting Document Annotation For File :- Karvea_clean_BILAG II.json
Error Found No Authorization Code Found In The Document Karvea_clean_BILAG II.json
Completed Document Annotation
Metrics : 1: Document Annotation,0.0002 Min,0.152902 MB,0.164091 MB,48.8%

Starting Extracting Content Between Heading For File :- Kar

2021-06-03 04:23:11,850 : Flow Logger HTML_E : 1: Generate XML,0.002 Min,0.430169 MB,0.698835 MB,48.8%
 | H | CAP |  da | HTML | Karvea_clean.htm


Metrics : 1: Generate XML,0.002 Min,0.430169 MB,0.698835 MB,48.8%



2021-06-03 04:23:13,053 : XML Submission Logger_1_i : Initiating Submission To FHIR Server | H | CAP |  da | 1 | Karvea_clean_BILAG II.json
2021-06-03 04:23:13,055 : XML Submission Logger_1_i : Response{"resourceType":"Bundle","id":"3eda80ab-b12b-43b1-807b-838f7dfafe44","meta":{"versionId":"1","lastUpdated":"2021-06-02T22:53:12.851+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:8e682db0-b830-4aa4-890a-2edc9ee644ee","resource":{"resourceType":"Bundle","id":"89060896-91af-493b-a8bd-dbcee9937fd1","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-06-02T22:53:11+00:00","entry":[{"fullUr | H | CAP |  da | 1 | Karvea_clean_BILAG II.json
2021-06-03 04:23:13,057 : XML Submission Logger_1_i : POST sucessful: XML added with id: 3eda80ab-b12b-43b1-807b-838f7dfafe44 | H | CAP |  da | 1 | Karvea_clean_BILAG II.json
2021-06-03 04:23:13,058 : Flow Logger HTML_E : 1: Submit FHIR Msg,0.0201 

POST sucessful: XML added with id 3eda80ab-b12b-43b1-807b-838f7dfafe44
Metrics : 1: Submit FHIR Msg,0.0201 Min,0.039112 MB,0.34262 MB,48.8%

Created XML File For :- Karvea_clean_BILAG II.json

listRegulatedAuthCodesAccrossePI []
No MAN Code found
Index 2
Starting Heading Extraction For File :- Karvea_clean_BILAG III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Karvea\da\2021-05-25T07-18-55Z\partitionedJSONs\Karvea_clean_BILAG III.json
--------------------------------------------
ETIKETTERING


2021-06-03 04:23:13,339 : Heading Extraction Karvea_clean_BILAG III.json_3 : Started Extracting Heading | H | CAP |  da | 2 | Karvea_clean_BILAG III.json
2021-06-03 04:23:13,720 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed : <=1|20.0|(89, 100, 95)|0.959| | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- 'A. ETIKETTERING' | Qrd txt :- 'ETIKETTERING ' | Matched :- 'True'
2021-06-03 04:23:13,726 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed As This The First Heading | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
 ANFØRES PÅ DEN YDRE EMBALLAGE' | Qrd txt :- 'MÆRKNING, DER SKAL ANFØRES <PÅ DEN YDRE EMBALLAGE> <OG> <PÅ DEN INDRE EMBALLAGE>' | Matched :- 'True' 2 | Karvea_clean_BILAG III.json | Doc txt :- 'MÆRKNING DER SKAL
2021-06-03 04:23:13,745 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILA

2021-06-03 04:23:19,592 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10017' | prevHeadingCurrId :- '10016' | prevHeadingFoundId :- '10016'
2021-06-03 04:23:19,788 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '16. INFORMATION I BRAILLESKRIFT' | Qrd txt :- '16. INFORMATION I BRAILLESKRIFT' | Matched :- 'True'
2021-06-03 04:23:19,797 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10018' | prevHeadingCurrId :- '10017' | prevHeadingFoundId :- '10017'
2021-06-03 04:23:20,160 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '17. ENTYDIG IDENTIFIKATOR – 2D-STREGKODE' | Qrd txt :- '17. ENTYDIG IDENTIFIKATOR – 2D-STREGKODE' | Matched :- 'True'
2021-06-03


OriginalCheck



2021-06-03 04:23:20,696 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '18. ENTYDIG IDENTIFIKATOR - MENNESKELIGT LÆSBARE DATA' | Qrd txt :- '18. ENTYDIG IDENTIFIKATOR - MENNESKELIGT LÆSBARE DATA' | Matched :- 'True'
2021-06-03 04:23:20,705 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10020' | prevHeadingCurrId :- '10019' | prevHeadingFoundId :- '10019'
2021-06-03 04:23:20,933 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- 'MINDSTEKRAV TIL MÆRKNING PÅ BLISTER ELLER STRIP' | Qrd txt :- 'MINDSTEKRAV TIL MÆRKNING PÅ BLISTER ELLER STRIP' | Matched :- 'True'
2021-06-03 04:23:20,943 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10021' | prevHeadingCu

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-06-03 04:23:23,872 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '1. LÆGEMIDLETS NAVN' | Qrd txt :- '1. LÆGEMIDLETS NAVN' | Matched :- 'True'
2021-06-03 04:23:23,881 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10003' | prevHeadingCurrId :- '10002' | prevHeadingFoundId :- '10002'
2021-06-03 04:23:24,354 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '2. ANGIVELSE AF AKTIVT STOF/AKTIVE STOFFER' | Qrd txt :- '2. ANGIVELSE AF AKTIVT STOF/AKTIVE STOFFER' | Matched :- 'True'
2021-06-03 04:23:24,363 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10004' | prevHeadingCurrId :- '10003' | prevHeadingFoundId :- '10003'
2021-06-03 04:23:24,647 : Head

2021-06-03 04:23:31,026 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10019' | prevHeadingCurrId :- '10018' | prevHeadingFoundId :- '10018'
 en entydig identifikator.' | Qrd txt :- '17. ENTYDIG IDENTIFIKATOR – 2D-STREGKODE' | Matched :- 'False' <=7|94.44|(9, 10, 91)|0.34| | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- 'Der er anført en 2D-stregkode, som indeholder



OriginalCheck



2021-06-03 04:23:31,544 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '18. ENTYDIG IDENTIFIKATOR - MENNESKELIGT LÆSBARE DATA' | Qrd txt :- '18. ENTYDIG IDENTIFIKATOR - MENNESKELIGT LÆSBARE DATA' | Matched :- 'True'
2021-06-03 04:23:31,554 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10020' | prevHeadingCurrId :- '10019' | prevHeadingFoundId :- '10019'
2021-06-03 04:23:31,761 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- 'MINDSTEKRAV TIL MÆRKNING PÅ BLISTER ELLER STRIP' | Qrd txt :- 'MINDSTEKRAV TIL MÆRKNING PÅ BLISTER ELLER STRIP' | Matched :- 'True'
2021-06-03 04:23:31,771 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10021' | prevHeadingCu

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-06-03 04:23:34,585 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '1. LÆGEMIDLETS NAVN' | Qrd txt :- '1. LÆGEMIDLETS NAVN' | Matched :- 'True'
2021-06-03 04:23:34,594 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10003' | prevHeadingCurrId :- '10002' | prevHeadingFoundId :- '10002'
2021-06-03 04:23:35,028 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '2. ANGIVELSE AF AKTIVT STOF/AKTIVE STOFFER' | Qrd txt :- '2. ANGIVELSE AF AKTIVT STOF/AKTIVE STOFFER' | Matched :- 'True'
2021-06-03 04:23:35,039 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10004' | prevHeadingCurrId :- '10003' | prevHeadingFoundId :- '10003'
2021-06-03 04:23:35,314 : Head

2021-06-03 04:23:42,131 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Failed In Lowercase : <=7|94.44|(9, 10, 91)|0.34| | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- 'Der er anført en 2D-stregkode, som indeholder en entydig identifikator.' | Qrd txt :- '17. ENTYDIG IDENTIFIKATOR – 2D-STREGKODE' | Matched :- 'False'



OriginalCheck



2021-06-03 04:23:42,597 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '18. ENTYDIG IDENTIFIKATOR - MENNESKELIGT LÆSBARE DATA' | Qrd txt :- '18. ENTYDIG IDENTIFIKATOR - MENNESKELIGT LÆSBARE DATA' | Matched :- 'True'
2021-06-03 04:23:42,608 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10020' | prevHeadingCurrId :- '10019' | prevHeadingFoundId :- '10019'
2021-06-03 04:23:42,874 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- 'MINDSTEKRAV TIL MÆRKNING PÅ BLISTER ELLER STRIP' | Qrd txt :- 'MINDSTEKRAV TIL MÆRKNING PÅ BLISTER ELLER STRIP' | Matched :- 'True'
2021-06-03 04:23:42,888 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10021' | prevHeadingCu

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-06-03 04:23:46,349 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '1. LÆGEMIDLETS NAVN' | Qrd txt :- '1. LÆGEMIDLETS NAVN' | Matched :- 'True'
2021-06-03 04:23:46,359 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10003' | prevHeadingCurrId :- '10002' | prevHeadingFoundId :- '10002'
2021-06-03 04:23:46,809 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '2. ANGIVELSE AF AKTIVT STOF/AKTIVE STOFFER' | Qrd txt :- '2. ANGIVELSE AF AKTIVT STOF/AKTIVE STOFFER' | Matched :- 'True'
2021-06-03 04:23:46,820 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10004' | prevHeadingCurrId :- '10003' | prevHeadingFoundId :- '10003'
2021-06-03 04:23:47,173 : Head

2021-06-03 04:23:53,187 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Failed In Lowercase : <=7|94.44|(9, 10, 91)|0.34| | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- 'Der er anført en 2D-stregkode, som indeholder en entydig identifikator.' | Qrd txt :- '17. ENTYDIG IDENTIFIKATOR – 2D-STREGKODE' | Matched :- 'False'



OriginalCheck



2021-06-03 04:23:53,551 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '18. ENTYDIG IDENTIFIKATOR - MENNESKELIGT LÆSBARE DATA' | Qrd txt :- '18. ENTYDIG IDENTIFIKATOR - MENNESKELIGT LÆSBARE DATA' | Matched :- 'True'
2021-06-03 04:23:53,561 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10020' | prevHeadingCurrId :- '10019' | prevHeadingFoundId :- '10019'
2021-06-03 04:23:53,778 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- 'MINDSTEKRAV TIL MÆRKNING PÅ BLISTER ELLER STRIP' | Qrd txt :- 'MINDSTEKRAV TIL MÆRKNING PÅ BLISTER ELLER STRIP' | Matched :- 'True'
2021-06-03 04:23:53,789 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10021' | prevHeadingCu

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-06-03 04:23:58,417 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '1. LÆGEMIDLETS NAVN' | Qrd txt :- '1. LÆGEMIDLETS NAVN' | Matched :- 'True'
2021-06-03 04:23:58,430 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10003' | prevHeadingCurrId :- '10002' | prevHeadingFoundId :- '10002'
2021-06-03 04:23:58,945 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '2. ANGIVELSE AF AKTIVT STOF/AKTIVE STOFFER' | Qrd txt :- '2. ANGIVELSE AF AKTIVT STOF/AKTIVE STOFFER' | Matched :- 'True'
2021-06-03 04:23:58,956 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10004' | prevHeadingCurrId :- '10003' | prevHeadingFoundId :- '10003'
2021-06-03 04:23:59,289 : Head

2021-06-03 04:24:06,918 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Failed In Lowercase : <=7|94.44|(9, 10, 91)|0.34| | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- 'Der er anført en 2D-stregkode, som indeholder en entydig identifikator.' | Qrd txt :- '17. ENTYDIG IDENTIFIKATOR – 2D-STREGKODE' | Matched :- 'False'



OriginalCheck



2021-06-03 04:24:07,259 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '18. ENTYDIG IDENTIFIKATOR - MENNESKELIGT LÆSBARE DATA' | Qrd txt :- '18. ENTYDIG IDENTIFIKATOR - MENNESKELIGT LÆSBARE DATA' | Matched :- 'True'
2021-06-03 04:24:07,269 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10020' | prevHeadingCurrId :- '10019' | prevHeadingFoundId :- '10019'
2021-06-03 04:24:07,477 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- 'MINDSTEKRAV TIL MÆRKNING PÅ BLISTER ELLER STRIP' | Qrd txt :- 'MINDSTEKRAV TIL MÆRKNING PÅ BLISTER ELLER STRIP' | Matched :- 'True'
2021-06-03 04:24:07,486 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10021' | prevHeadingCu

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-06-03 04:24:10,813 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10003' | prevHeadingCurrId :- '10002' | prevHeadingFoundId :- '10002'
2021-06-03 04:24:11,197 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '2. ANGIVELSE AF AKTIVT STOF/AKTIVE STOFFER' | Qrd txt :- '2. ANGIVELSE AF AKTIVT STOF/AKTIVE STOFFER' | Matched :- 'True'
2021-06-03 04:24:11,206 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10004' | prevHeadingCurrId :- '10003' | prevHeadingFoundId :- '10003'
2021-06-03 04:24:11,467 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '3. LISTE OVER HJÆLPESTOFFER' | Qrd txt :- '3. LISTE OVER HJÆLPESTOFFER' | Matched :- 'True'
2021-06-03 04:

2021-06-03 04:24:20,551 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Failed In Lowercase : <=7|94.44|(9, 10, 91)|0.34| | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- 'Der er anført en 2D-stregkode, som indeholder en entydig identifikator.' | Qrd txt :- '17. ENTYDIG IDENTIFIKATOR – 2D-STREGKODE' | Matched :- 'False'



OriginalCheck



2021-06-03 04:24:21,773 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- '18. ENTYDIG IDENTIFIKATOR - MENNESKELIGT LÆSBARE DATA' | Qrd txt :- '18. ENTYDIG IDENTIFIKATOR - MENNESKELIGT LÆSBARE DATA' | Matched :- 'True'
2021-06-03 04:24:21,800 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10020' | prevHeadingCurrId :- '10019' | prevHeadingFoundId :- '10019'
2021-06-03 04:24:22,654 : Heading Extraction Karvea_clean_BILAG III.json_3 : Match Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | Doc txt :- 'MINDSTEKRAV TIL MÆRKNING PÅ BLISTER ELLER STRIP' | Qrd txt :- 'MINDSTEKRAV TIL MÆRKNING PÅ BLISTER ELLER STRIP' | Matched :- 'True'
2021-06-03 04:24:22,697 : Heading Extraction Karvea_clean_BILAG III.json_3 : Validation Passed | H | CAP |  da | 2 | Karvea_clean_BILAG III.json | currHeadId :- '10021' | prevHeadingCu



Heading Not Found 
 ['BATCHNUMMER <, DONATIONS- OG PRODUKTKODER>', 'MINDSTEKRAV TIL MÆRKNING PÅ SMÅ INDRE EMBALLAGER', 'LÆGEMIDLETS NAVN OG ADMINISTRATIONSVEJ€', 'ADMINISTRATIONSMETODE', 'INDHOLD ANGIVET SOM VÆGT, VOLUMEN ELLER ENHEDER']


dict_keys([])
Completed Heading Extraction For File
Metrics : 2: Heading Extraction,1.2361 Min,1.088974 MB,3.401133 MB,49.0%

Starting Document Annotation For File :- Karvea_clean_BILAG III.json
 ['EU/1/97/049/010', 'EU/1/97/049/001', 'EU/1/97/049/002', 'EU/1/97/049/013', 'EU/1/97/049/003', '13.', 'Lot', 'EU/1/97/049/011', 'EU/1/97/049/004', 'EU/1/97/049/005', 'EU/1/97/049/014', 'EU/1/97/049/006', 'EU/1/97/049/012', 'EU/1/97/049/007', 'EU/1/97/049/008', 'EU/1/97/049/015', 'EU/1/97/049/009', 'EU/1/97/049/016', 'EU/1/97/049/017', 'EU/1/97/049/018', 'EU/1/97/049/019', 'EU/1/97/049/031', 'EU/1/97/049/020', 'EU/1/97/049/021', 'EU/1/97/049/022', 'EU/1/97/049/023', 'EU/1/97/049/024', 'EU/1/97/049/032', 'EU/1/97/049/025', 'EU/1/97/049/026', 'EU/1/97/049/02

2021-06-03 04:24:28,343 : Flow Logger HTML_E : Completed Document Annotation | H | CAP |  da | HTML | Karvea_clean.htm
2021-06-03 04:24:28,344 : Flow Logger HTML_E : 2: Document Annotation,0.0186 Min,0.011595 MB,0.207041 MB,49.0%
 | H | CAP |  da | HTML | Karvea_clean.htm
2021-06-03 04:24:28,347 : Flow Logger HTML_E : Starting Extracting Content Between Heading | H | CAP |  da | HTML | Karvea_clean.htm
2021-06-03 04:24:28,351 : ExtractContentBetween_2_p : Cleaning Match Results | H | CAP |  da | 2 | Karvea_clean_BILAG III.json
2021-06-03 04:24:28,358 : ExtractContentBetween_2_p : Finished Cleaning Match Results | H | CAP |  da | 2 | Karvea_clean_BILAG III.json


Found entry with code 220000000061
Found Packaged Product Definition
Error Found Missing Key 'holder' in entry key value pair
Completed Document Annotation
Metrics : 2: Document Annotation,0.0186 Min,0.011595 MB,0.207041 MB,49.0%

Starting Extracting Content Between Heading For File :- Karvea_clean_BILAG III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Karvea\da\2021-05-25T07-18-55Z\partitionedJSONs\Karvea_clean_BILAG III.json
--------------------------------------------


2021-06-03 04:24:28,956 : Flow Logger HTML_E : Completed Extracting Content Between Heading | H | CAP |  da | HTML | Karvea_clean.htm
2021-06-03 04:24:28,958 : Flow Logger HTML_E : 2: Content Extraction,0.0102 Min,0.451214 MB,1.861355 MB,49.0%
 | H | CAP |  da | HTML | Karvea_clean.htm
2021-06-03 04:24:28,989 : XmlGeneration_2_0 : PMS/OMS Annotation Information Not Retrieved | H | CAP |  da | 2 | Karvea_clean_BILAG III.json
2021-06-03 04:24:28,991 : XmlGeneration_2_0 : Initiating XML Generation | H | CAP |  da | 2 | Karvea_clean_BILAG III.json


Completed Extracting Content Between Heading
Metrics : 2: Content Extraction,0.0102 Min,0.451214 MB,1.861355 MB,49.0%

Already Exists


2021-06-03 04:24:29,230 : XmlGeneration_2_0 : Writing to File:Karvea_clean_BILAG III.xml | H | CAP |  da | 2 | Karvea_clean_BILAG III.json
2021-06-03 04:24:29,235 : Flow Logger HTML_E : 2: Generate XML,0.0046 Min,1.654568 MB,2.617235 MB,49.0%
 | H | CAP |  da | HTML | Karvea_clean.htm


Metrics : 2: Generate XML,0.0046 Min,1.654568 MB,2.617235 MB,49.0%



2021-06-03 04:24:31,225 : XML Submission Logger_2_n : Initiating Submission To FHIR Server | H | CAP |  da | 2 | Karvea_clean_BILAG III.json
2021-06-03 04:24:31,227 : XML Submission Logger_2_n : Response{"resourceType":"Bundle","id":"5f31d603-a592-488c-90f2-b53b0133fed5","meta":{"versionId":"1","lastUpdated":"2021-06-02T22:54:30.678+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:bbe1b88f-5a54-42a4-a038-aa42133a6186","resource":{"resourceType":"Bundle","id":"b7b1417c-df06-4d07-bbe1-824f5deb8321","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-06-02T22:54:28+00:00","entry":[{"fullUr | H | CAP |  da | 2 | Karvea_clean_BILAG III.json
2021-06-03 04:24:31,239 : XML Submission Logger_2_n : POST sucessful: XML added with id: 5f31d603-a592-488c-90f2-b53b0133fed5 | H | CAP |  da | 2 | Karvea_clean_BILAG III.json
2021-06-03 04:24:31,242 : Flow Logger HTML_E : 2: Submit FHIR Msg,0.03

POST sucessful: XML added with id 5f31d603-a592-488c-90f2-b53b0133fed5
Metrics : 2: Submit FHIR Msg,0.0334 Min,0.204931 MB,0.827619 MB,48.9%

Created XML File For :- Karvea_clean_BILAG III.json

listRegulatedAuthCodesAccrossePI ['EU/1/97/049/010', 'EU/1/97/049/001', 'EU/1/97/049/002', 'EU/1/97/049/013', 'EU/1/97/049/003', '13.', 'Lot', 'EU/1/97/049/011', 'EU/1/97/049/004', 'EU/1/97/049/005', 'EU/1/97/049/014', 'EU/1/97/049/006', 'EU/1/97/049/012', 'EU/1/97/049/007', 'EU/1/97/049/008', 'EU/1/97/049/015', 'EU/1/97/049/009', 'EU/1/97/049/016', 'EU/1/97/049/017', 'EU/1/97/049/018', 'EU/1/97/049/019', 'EU/1/97/049/031', 'EU/1/97/049/020', 'EU/1/97/049/021', 'EU/1/97/049/022', 'EU/1/97/049/023', 'EU/1/97/049/024', 'EU/1/97/049/032', 'EU/1/97/049/025', 'EU/1/97/049/026', 'EU/1/97/049/027', 'EU/1/97/049/028', 'EU/1/97/049/029', 'EU/1/97/049/033', 'EU/1/97/049/030']


2021-06-03 04:24:32,265 : List Bundle Creation Logger_2_6 : Getting list bundle for MAN EU/1/97/049/001  | H | CAP |  da | 2 | Karvea_clean_BILAG III.json
2021-06-03 04:24:33,131 : List Bundle Creation Logger_2_6 : Getting list bundle for MAN EU/1/97/049/002  | H | CAP |  da | 2 | Karvea_clean_BILAG III.json
2021-06-03 04:24:34,017 : List Bundle Creation Logger_2_6 : Getting list bundle for MAN EU/1/97/049/013  | H | CAP |  da | 2 | Karvea_clean_BILAG III.json
2021-06-03 04:24:34,875 : List Bundle Creation Logger_2_6 : Getting list bundle for MAN EU/1/97/049/003  | H | CAP |  da | 2 | Karvea_clean_BILAG III.json
2021-06-03 04:24:35,759 : List Bundle Creation Logger_2_6 : Getting list bundle for MAN 13.  | H | CAP |  da | 2 | Karvea_clean_BILAG III.json
2021-06-03 04:24:36,631 : List Bundle Creation Logger_2_6 : Getting list bundle for MAN Lot  | H | CAP |  da | 2 | Karvea_clean_BILAG III.json
2021-06-03 04:24:37,464 : List Bundle Creation Logger_2_6 : Getting list bundle for MAN EU/1/9

{'item': {'extension': [{'url': 'http://ema.europa.eu/fhir/extension/documentType', 'valueCoding': {'system': 'http://spor.ema.europa.eu/v1/100000155531', 'code': '100000155535', 'display': 'ETIKETTERING'}}, {'url': 'http://ema.europa.eu/fhir/extension/language', 'valueCoding': {'system': 'http://spor.ema.europa.eu/v1/100000072057', 'code': '100000072168', 'display': 'da'}}], 'reference': 'Bundle/6d13b128-2d68-4226-80eb-56bc48342777'}}
{'extension': [{'url': 'http://ema.europa.eu/fhir/extension/documentType', 'valueCoding': {'system': 'http://spor.ema.europa.eu/v1/100000155531', 'code': '100000155535', 'display': 'ETIKETTERING'}}, {'url': 'http://ema.europa.eu/fhir/extension/language', 'valueCoding': {'system': 'http://spor.ema.europa.eu/v1/100000072057', 'code': '100000072168', 'display': 'da'}}], 'reference': 'Bundle/6d13b128-2d68-4226-80eb-56bc48342777'}
b'<List xmlns="http://hl7.org/fhir"><id value="de99762d-bad5-47bb-ba34-207115a18aa3" /><identifier><system value="http://spor.ema.

2021-06-03 04:25:05,438 : List Bundle Creation Logger_2_6 : List update successfully completed de99762d-bad5-47bb-ba34-207115a18aa3 | H | CAP |  da | 2 | Karvea_clean_BILAG III.json
2021-06-03 04:25:05,448 : Flow Logger HTML_E : Completed list bundle update/addition | H | CAP |  da | HTML | Karvea_clean.htm
2021-06-03 04:25:05,457 : Flow Logger HTML_E : 2: Update/Add List Bundle,0.5702 Min,0.280586 MB,0.448369 MB,49.2%
 | H | CAP |  da | HTML | Karvea_clean.htm
2021-06-03 04:25:05,470 : Flow Logger HTML_E : 



||||||||||||||||||||||||||||||||3 ||||| Karvea_clean_ INDLÆGSSEDDEL.json||||||||||||||||||||||||||||||||



 | H | CAP |  da | HTML | Karvea_clean.htm
2021-06-03 04:25:05,622 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Starting Heading Extraction | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json


Metrics : 2: Update/Add List Bundle,0.5702 Min,0.280586 MB,0.448369 MB,49.2%

Index 3
Starting Heading Extraction For File :- Karvea_clean_ INDLÆGSSEDDEL.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Karvea\da\2021-05-25T07-18-55Z\partitionedJSONs\Karvea_clean_ INDLÆGSSEDDEL.json
--------------------------------------------
INDLÆGSSEDDEL


2021-06-03 04:25:06,695 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Started Extracting Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json
2021-06-03 04:25:06,773 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed : <=1|18.75|(90, 100, 95)|0.962| | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- 'B. INDLÆGSSEDDEL' | Qrd txt :- 'INDLÆGSSEDDEL' | Matched :- 'True'
2021-06-03 04:25:06,783 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed As This The First Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:25:19,475 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed : <=4|3.57|(98, 100, 100)|0.996| | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- 'Oversigt over indlægssedlen:' | Qrd txt :- 'Oversigt over indlægssedlen' | Matched :- 'True'
2021-06-03 04:25:19,494 

----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:25:20,013 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '1. Virkning og anvendelse' | Qrd txt :- '1. Virkning og anvendelse' | Matched :- 'True'
2021-06-03 04:25:20,026 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11004' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11001'
2021-06-03 04:25:20,034 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11004' | prevHeadingCurrId :- '11001' | prevHeadingFoundId :- '11001'
2021-06-03 04:25:20,040 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11004' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11001'


----------------------------------
RemovedByStyle
----------------------------------


 vide, før du begynder at tage Karvea' | Qrd txt :- '2. Det skal <De> <du> vide, før <De> <du> begynder at <tage> <bruge> {X}' | Matched :- 'True' da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '2.       Det skal du
2021-06-03 04:25:20,556 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11001'
2021-06-03 04:25:20,562 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11005' | prevHeadingCurrId :- '11001' | prevHeadingFoundId :- '11001'
2021-06-03 04:25:20,568 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11001'


----------------------------------
RemovedByStyle
----------------------------------


 tage Karvea' | Qrd txt :- '3. Sådan skal <De> <du> <tage> <bruge> {X}' | Matched :- 'True'sed : Contains<>|64.29|(69, 71, 80)|0.89| | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '3.       Sådan skal du
2021-06-03 04:25:21,238 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11014' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11001'
2021-06-03 04:25:21,243 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11014' | prevHeadingCurrId :- '11001' | prevHeadingFoundId :- '11001'
2021-06-03 04:25:21,249 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11014' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11001'


----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:25:21,621 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '4. Bivirkninger' | Qrd txt :- '4. Bivirkninger' | Matched :- 'True'
2021-06-03 04:25:21,631 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11019' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11001'
2021-06-03 04:25:21,636 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11019' | prevHeadingCurrId :- '11001' | prevHeadingFoundId :- '11001'
2021-06-03 04:25:21,641 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11019' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11001'


----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:25:21,871 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '5. Opbevaring' | Qrd txt :- '5. Opbevaring' | Matched :- 'True'
2021-06-03 04:25:21,882 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11001'
2021-06-03 04:25:21,888 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11022' | prevHeadingCurrId :- '11001' | prevHeadingFoundId :- '11001'
2021-06-03 04:25:21,891 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11001'


----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:25:22,318 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '6. Pakningsstørrelser og yderligere oplysninger' | Qrd txt :- '6. Pakningsstørrelser og yderligere oplysninger' | Matched :- 'True'
2021-06-03 04:25:22,329 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11023' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11001'
2021-06-03 04:25:22,335 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11023' | prevHeadingCurrId :- '11001' | prevHeadingFoundId :- '11001'
2021-06-03 04:25:22,340 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11023' | prevHeadingCurrId :- '' | p

----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:25:22,531 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11004' | prevHeadingCurrId :- '11001' | prevHeadingFoundId :- '11001'
2021-06-03 04:25:22,538 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11004' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11001'


----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:25:26,573 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed : Contains<>|49.02|(76, 73, 87)|0.899| | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '2.       Det skal du vide, før du begynder at tage Karvea' | Qrd txt :- '2. Det skal <De> <du> vide, før <De> <du> begynder at <tage> <bruge> {X}' | Matched :- 'True'
2021-06-03 04:25:26,587 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11001'
2021-06-03 04:25:26,601 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11005' | prevHeadingCurrId :- '11001' | prevHeadingFoundId :- '11001'
2021-06-03 04:25:26,610 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean

----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:25:33,937 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- 'Advarsler og forsigtighedsregler' | Qrd txt :- 'Advarsler og forsigtighedsregler' | Matched :- 'True'
2021-06-03 04:25:33,947 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11007' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11001'
2021-06-03 04:25:33,954 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11007' | prevHeadingCurrId :- '11001' | prevHeadingFoundId :- '11001'
2021-06-03 04:25:43,363 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed : Contains<>|16.67|(92, 92, 96)|0.981| | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- 'Børn og unge ' | Qrd txt :- 'Børn <og unge> 


OriginalCheck



2021-06-03 04:25:54,168 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed : SpecialCase2|79.49|(66, 69, 86)|0.692| | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- 'Brug af Karvea sammen med mad og drikke' | Qrd txt :- 'Brug af {X} sammen med <mad> <og> <,> <drikke> <og> <alkohol>' | Matched :- 'True'
2021-06-03 04:25:54,177 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11010' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11008'
2021-06-03 04:25:54,184 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11010' | prevHeadingCurrId :- '11008' | prevHeadingFoundId :- '11008'
2021-06-03 04:25:54,627 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed : SpecialCase3|115.0|(63, 70, 86)|0.893| | H | CAP |  da | 3 | Karvea_clean

----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:26:13,410 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '4. Bivirkninger' | Qrd txt :- '4. Bivirkninger' | Matched :- 'True'
2021-06-03 04:26:13,421 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11019' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11012'
2021-06-03 04:26:13,429 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11019' | prevHeadingCurrId :- '11012' | prevHeadingFoundId :- '11012'
2021-06-03 04:26:21,376 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- 'Indberetning af bivirkninger' | Qrd txt :- 'Indberetning af bivirkninger' | Matched :- 'True'
2021-06-03 04:26:21,385

----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:26:21,610 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '5. Opbevaring' | Qrd txt :- '5. Opbevaring' | Matched :- 'True'
2021-06-03 04:26:21,618 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11019'
2021-06-03 04:26:21,625 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11022' | prevHeadingCurrId :- '11019' | prevHeadingFoundId :- '11019'
2021-06-03 04:26:21,629 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11019'


----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:26:25,282 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '6. Pakningsstørrelser og yderligere oplysninger' | Qrd txt :- '6. Pakningsstørrelser og yderligere oplysninger' | Matched :- 'True'
2021-06-03 04:26:25,293 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11023' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11019'
2021-06-03 04:26:25,299 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11023' | prevHeadingCurrId :- '11019' | prevHeadingFoundId :- '11019'
2021-06-03 04:26:25,305 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11023' | prevHeadingCurrId :- '' | p

----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:26:29,015 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed : <=4|3.45|(98, 97, 98)|0.985| | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- 'Udseende og pakningstørrelser' | Qrd txt :- 'Udseende og pakningsstørrelser' | Matched :- 'True'
2021-06-03 04:26:29,027 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11025' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11019'
2021-06-03 04:26:29,032 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11025' | prevHeadingCurrId :- '11019' | prevHeadingFoundId :- '11019'
 indehaveren af markedsføringstilladelsen:' | Qrd txt :- 'Hvis <De> <du> ønsker yderligere oplysninger om <{X}> <dette lægemiddel>, skal <De> <du> henvende <Dem> <dig> til den lokale repræsentant for indehaveren af marked

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:26:45,223 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '1. Virkning og anvendelse' | Qrd txt :- '1. Virkning og anvendelse' | Matched :- 'True'
2021-06-03 04:26:45,230 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed As This The First Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11004' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:26:45,236 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11004' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''


----------------------------------
RemovedByStyle
----------------------------------


 vide, før du begynder at tage Karvea' | Qrd txt :- '2. Det skal <De> <du> vide, før <De> <du> begynder at <tage> <bruge> {X}' | Matched :- 'True' da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '2.       Det skal du
2021-06-03 04:26:45,605 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed As This The First Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:26:45,609 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''


----------------------------------
RemovedByStyle
----------------------------------


 tage Karvea' | Qrd txt :- '3. Sådan skal <De> <du> <tage> <bruge> {X}' | Matched :- 'True'sed : Contains<>|64.29|(69, 71, 80)|0.89| | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '3.       Sådan skal du
2021-06-03 04:26:46,308 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed As This The First Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11014' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:26:46,321 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11014' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''


----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:26:46,785 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '4. Bivirkninger' | Qrd txt :- '4. Bivirkninger' | Matched :- 'True'
2021-06-03 04:26:46,794 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : (19, 3.0, 'CAP') Validation Passed As Current Heading Is Same As Previous H1 Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11019' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:26:46,799 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11019' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''


----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:26:47,105 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '5. Opbevaring' | Qrd txt :- '5. Opbevaring' | Matched :- 'True'
2021-06-03 04:26:47,113 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed As This The First Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:26:47,121 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''


----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:26:47,589 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '6. Pakningsstørrelser og yderligere oplysninger' | Qrd txt :- '6. Pakningsstørrelser og yderligere oplysninger' | Matched :- 'True'
2021-06-03 04:26:47,600 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed As This The First Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11023' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:26:47,607 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11023' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:26:47,790 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '1. Virkning og anvendelse' | Qrd txt :- '

----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:26:47,798 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed As This The First Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11004' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:26:47,805 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11004' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''


----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:26:50,232 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed : Contains<>|49.02|(76, 73, 87)|0.899| | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '2.       Det skal du vide, før du begynder at tage Karvea' | Qrd txt :- '2. Det skal <De> <du> vide, før <De> <du> begynder at <tage> <bruge> {X}' | Matched :- 'True'
2021-06-03 04:26:50,240 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed As This The First Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:26:50,245 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''


----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:26:54,831 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- 'Advarsler og forsigtighedsregler' | Qrd txt :- 'Advarsler og forsigtighedsregler' | Matched :- 'True'
2021-06-03 04:26:54,838 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed As This The First Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11007' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:27:03,525 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed : Contains<>|16.67|(92, 92, 96)|0.981| | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- 'Børn og unge' | Qrd txt :- 'Børn <og unge> ' | Matched :- 'True'
2021-06-03 04:27:03,535 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11008' | prevHeadingCurrId :- '11007


OriginalCheck



2021-06-03 04:27:13,194 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed : SpecialCase2|79.49|(66, 69, 86)|0.692| | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- 'Brug af Karvea sammen med mad og drikke' | Qrd txt :- 'Brug af {X} sammen med <mad> <og> <,> <drikke> <og> <alkohol>' | Matched :- 'True'
2021-06-03 04:27:13,209 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11010' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11008'
2021-06-03 04:27:13,216 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11010' | prevHeadingCurrId :- '11008' | prevHeadingFoundId :- '11008'
2021-06-03 04:27:13,751 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed : SpecialCase3|115.0|(63, 70, 86)|0.893| | H | CAP |  da | 3 | Karvea_clean

----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:27:38,966 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '4. Bivirkninger' | Qrd txt :- '4. Bivirkninger' | Matched :- 'True'
2021-06-03 04:27:38,972 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : (19, 3.0, 'CAP') Validation Passed As Current Heading Is Same As Previous H1 Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11019' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11012'
2021-06-03 04:27:46,701 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- 'Indberetning af bivirkninger' | Qrd txt :- 'Indberetning af bivirkninger' | Matched :- 'True'
2021-06-03 04:27:46,712 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11021' | prevHeadingCurrId

----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:27:46,948 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '5. Opbevaring' | Qrd txt :- '5. Opbevaring' | Matched :- 'True'
2021-06-03 04:27:46,957 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11019'
2021-06-03 04:27:46,962 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11022' | prevHeadingCurrId :- '11019' | prevHeadingFoundId :- '11019'
2021-06-03 04:27:46,968 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11019'


----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:27:50,933 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '6. Pakningsstørrelser og yderligere oplysninger' | Qrd txt :- '6. Pakningsstørrelser og yderligere oplysninger' | Matched :- 'True'
2021-06-03 04:27:50,943 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11023' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11019'
2021-06-03 04:27:50,948 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11023' | prevHeadingCurrId :- '11019' | prevHeadingFoundId :- '11019'
2021-06-03 04:27:53,939 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed : <=4|3.45|(98, 97, 98)|0.985| | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- 'Udseende og pakningstø

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:28:11,617 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '1. Virkning og anvendelse' | Qrd txt :- '1. Virkning og anvendelse' | Matched :- 'True'
2021-06-03 04:28:11,628 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed As This The First Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11004' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:28:11,635 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11004' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''


----------------------------------
RemovedByStyle
----------------------------------


 vide, før du begynder at tage Karvea' | Qrd txt :- '2. Det skal <De> <du> vide, før <De> <du> begynder at <tage> <bruge> {X}' | Matched :- 'True' da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '2.       Det skal du
2021-06-03 04:28:12,063 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed As This The First Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:28:12,069 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''


----------------------------------
RemovedByStyle
----------------------------------


 tage Karvea' | Qrd txt :- '3. Sådan skal <De> <du> <tage> <bruge> {X}' | Matched :- 'True'sed : Contains<>|64.29|(69, 71, 80)|0.89| | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '3.       Sådan skal du
2021-06-03 04:28:12,623 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed As This The First Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11014' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:28:12,628 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11014' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''


----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:28:12,944 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '4. Bivirkninger' | Qrd txt :- '4. Bivirkninger' | Matched :- 'True'
2021-06-03 04:28:12,953 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed As This The First Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11019' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:28:12,957 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11019' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''


----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:28:13,168 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '5. Opbevaring' | Qrd txt :- '5. Opbevaring' | Matched :- 'True'
2021-06-03 04:28:13,177 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed As This The First Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:28:13,182 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''


----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:28:13,544 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '6. Pakningsstørrelser og yderligere oplysninger' | Qrd txt :- '6. Pakningsstørrelser og yderligere oplysninger' | Matched :- 'True'
2021-06-03 04:28:13,551 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : (23, 3.0, 'CAP') Validation Passed As Current Heading Is Same As Previous H1 Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11023' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:28:13,557 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11023' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:28:13,711 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '

----------------------------------
RemovedByStyle
----------------------------------
----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:28:15,971 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed : Contains<>|49.02|(76, 73, 87)|0.899| | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '2.       Det skal du vide, før du begynder at tage Karvea' | Qrd txt :- '2. Det skal <De> <du> vide, før <De> <du> begynder at <tage> <bruge> {X}' | Matched :- 'True'
2021-06-03 04:28:15,979 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed As This The First Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:28:15,983 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''


----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:28:20,800 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- 'Advarsler og forsigtighedsregler' | Qrd txt :- 'Advarsler og forsigtighedsregler' | Matched :- 'True'
2021-06-03 04:28:20,807 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed As This The First Heading | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11007' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-03 04:28:32,613 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed : Contains<>|16.67|(92, 92, 96)|0.981| | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- 'Børn og unge' | Qrd txt :- 'Børn <og unge> ' | Matched :- 'True'
2021-06-03 04:28:32,624 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11008' | prevHeadingCurrId :- '11007


OriginalCheck



2021-06-03 04:28:43,143 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed : SpecialCase2|79.49|(66, 69, 86)|0.692| | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- 'Brug af Karvea sammen med mad og drikke' | Qrd txt :- 'Brug af {X} sammen med <mad> <og> <,> <drikke> <og> <alkohol>' | Matched :- 'True'
2021-06-03 04:28:43,153 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11010' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11008'
2021-06-03 04:28:43,159 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11010' | prevHeadingCurrId :- '11008' | prevHeadingFoundId :- '11008'
2021-06-03 04:28:43,584 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed : SpecialCase3|115.0|(63, 70, 86)|0.893| | H | CAP |  da | 3 | Karvea_clean

----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:29:05,669 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '4. Bivirkninger' | Qrd txt :- '4. Bivirkninger' | Matched :- 'True'
2021-06-03 04:29:05,683 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11019' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11012'
2021-06-03 04:29:05,693 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11019' | prevHeadingCurrId :- '11012' | prevHeadingFoundId :- '11012'
2021-06-03 04:29:15,289 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- 'Indberetning af bivirkninger' | Qrd txt :- 'Indberetning af bivirkninger' | Matched :- 'True'
2021-06-03 04:29:15,319

----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:29:15,811 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '5. Opbevaring' | Qrd txt :- '5. Opbevaring' | Matched :- 'True'
2021-06-03 04:29:15,835 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11019'
2021-06-03 04:29:15,854 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11022' | prevHeadingCurrId :- '11019' | prevHeadingFoundId :- '11019'
2021-06-03 04:29:15,891 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11019'


----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:29:20,671 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- '6. Pakningsstørrelser og yderligere oplysninger' | Qrd txt :- '6. Pakningsstørrelser og yderligere oplysninger' | Matched :- 'True'
2021-06-03 04:29:20,686 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11023' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11019'
2021-06-03 04:29:20,694 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11023' | prevHeadingCurrId :- '11019' | prevHeadingFoundId :- '11019'
2021-06-03 04:29:20,704 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Failed By Style | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11023' | prevHeadingCurrId :- '' | p

----------------------------------
RemovedByStyle
----------------------------------


2021-06-03 04:29:24,462 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Match Passed : <=4|3.45|(98, 97, 98)|0.985| | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | Doc txt :- 'Udseende og pakningstørrelser' | Qrd txt :- 'Udseende og pakningsstørrelser' | Matched :- 'True'
2021-06-03 04:29:24,471 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Flow Is Broken | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11025' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '11019'
2021-06-03 04:29:24,478 : Heading Extraction Karvea_clean_ INDLÆGSSEDDEL.json_Q : Validation Passed | H | CAP |  da | 3 | Karvea_clean_ INDLÆGSSEDDEL.json | currHeadId :- '11025' | prevHeadingCurrId :- '11019' | prevHeadingFoundId :- '11019'
 indehaveren af markedsføringstilladelsen:' | Qrd txt :- 'Hvis <De> <du> ønsker yderligere oplysninger om <{X}> <dette lægemiddel>, skal <De> <du> henvende <Dem> <dig> til den lokale repræsentant for indehaveren af marked

In [63]:
convertCollectionToDataFrame(b)

Unnamed: 0,index,id,Procedure type,Display code,Name,parent_id,htmlText,htmlIndex,htmlId,SubSectionIndex,doc_parent_id,heading_id
0,272,8001,CAP,,PRODUKTRESUMÉ,,PRODUKTRESUME,26,fd4ca558-9b66-42af-9ff7-f10552349731,0,,1
1,274,8003,CAP,1.0,LÆGEMIDLETS NAVN,8001.0,1. LÆGEMIDLETS NAVN,30,511e62b5-ad08-4d7a-9a0a-fa6d20151fe1,0,8001.0,3
2,275,8004,CAP,2.0,KVALITATIV OG KVANTITATIV SAMMENSÆTNING,8001.0,2. KVALITATIV OG KVANTITATIV\r SAMMENSÆTNING,35,8d1fd64c-8666-4593-bab5-17bd3db009ba,0,8001.0,4
3,279,8008,CAP,3.0,LÆGEMIDDELFORM,8001.0,3. LÆGEMIDDELFORM,45,e3c7f7e0-4daf-488d-aa20-91b284048b8a,0,8001.0,8
4,280,8009,CAP,4.0,KLINISKE OPLYSNINGER,8001.0,4. KLINISKE OPLYSNINGER,51,72c55e94-05d5-483a-882a-3379acbb27e8,0,8001.0,9
5,281,8010,CAP,4.1,Terapeutiske indikationer,8009.0,4.1 Terapeutiske indikationer,53,b9557958-672a-4aa0-b525-0deaf84a1c06,0,8009.0,10
6,287,8016,CAP,4.3,Kontraindikationer,8009.0,4.3 Kontraindikationer,93,87e2af9a-327c-4da7-b2ee-b4800c14a3da,0,8009.0,16
7,288,8017,CAP,4.4,Særlige advarsler og forsigtighedsregler vedrørende brugen,8009.0,4.4 Særlige advarsler og\r forsigtighedsregler vedrørende brugen,100,ac68dc25-6f81-43ca-a6fb-11f3d4f24fdd,0,8009.0,17
8,291,8020,CAP,4.5,Interaktion med andre lægemidler og andre former for interaktion,8009.0,4.5 Interaktion med andre lægemidler og\r andre former for interaktion,135,1460d2e3-2243-4459-b732-2890ab8e85e3,0,8009.0,20
9,293,8022,CAP,4.6,"Fertilitet, graviditet og amning",8009.0,"4.6 Fertilitet, graviditet og amning",153,b1321040-f717-4adb-9add-6a09d94d61f9,0,8009.0,22
