In [59]:
import os
import zipfile
%load_ext autoreload

%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [60]:
import tracemalloc
import psutil
import pprint
import pandas as pd
import uuid
import json
import os
import glob
import re
import sys
from bs4 import NavigableString, BeautifulSoup
from collections import defaultdict
import random
import string
import time

from utils.config import config
from utils.logger.logger import loggerCreator

# ePI Modules
from parse.rulebook.rulebook import StyleRulesDictionary

from parse.extractor.parser import parserExtractor
from match.matchDocument.matchDocument import MatchDocument
from documentAnnotation.documentAnnotation import DocumentAnnotation
from htmlDocTypePartitioner.partition import DocTypePartitioner
from extractContentBetweenHeadings.dataBetweenHeadingsExtractor import DataBetweenHeadingsExtractor
from fhirXmlGenerator.fhirXmlGenerator import FhirXmlGenerator
from fhirService.fhirService import FhirService
from utils.logger.matchLogger import MatchLogger
from languageInfo.documentTypeNames.documentTypeNames import DocumentTypeNames


class FolderNotFoundError(Exception):
    pass

class Metrics:
    
    def __init__(self, logFileName, logger):
        self.logFileName = logFileName
        self.start()
        self.writer = open(self.logFileName, 'a')
        self.writer.write("StepName,Time,Current Memory,Peak Memory,Used Ram Percentage\n")
        self.finalPeak = 0
        self.finalTotalTime = 0
        self.finalUsedRamPerc = 0
        self.logger = logger
    
    def start(self):
        self.startTime = time.time()
        tracemalloc.start()
    
    def getMetric(self, msg):
        
        self.endTime = time.time()
        
        self.totalTime = self.endTime - self.startTime
        
        
        current, peak = tracemalloc.get_traced_memory()
        current = current / 10**6
        peak = peak / 10**6
        
        usedRamPerc = psutil.virtual_memory()[2]
        
        self.finalPeak = max(self.finalPeak, peak)
        self.finalUsedRamPerc = max(self.finalUsedRamPerc, usedRamPerc)

        self.finalTotalTime = self.finalTotalTime + self.totalTime
        #self.finalTotalTime = round(self.finalTotalTime/60,3)
        
        outputString = f"{msg},{round(self.totalTime/60,4)} Min,{current} MB,{peak} MB,{usedRamPerc}%\n"
        
        self.logger.logFlowCheckpoint(f"{outputString}")
        
        print(f"Metrics : {outputString}")
        self.writer.write(outputString)
        tracemalloc.stop()
        tracemalloc.start()
        self.startTime = time.time()
    def end(self):
        
        current, peak = tracemalloc.get_traced_memory()
        current = current / 10**6
        outputString = f"Final Metrics,{round(self.finalTotalTime/60,4)} Min,{current} MB,{self.finalPeak} MB,{self.finalUsedRamPerc}%\n"
        print(f"Metrics : {outputString}")
        self.logger.logFlowCheckpoint(f"{outputString}")
        self.writer.write(outputString)
        self.writer.close()
        tracemalloc.stop()
        
        


def convertToInt(x):
    try:
        return str(int(x))
    except:
        return x


def convertCollectionToDataFrame(collection):

    dfExtractedHier = pd.DataFrame(collection)
    dfExtractedHier['parent_id'] = dfExtractedHier['parent_id'].apply(
        lambda x: convertToInt(x))
    dfExtractedHier['id'] = dfExtractedHier['id'].apply(
        lambda x: convertToInt(x))

    return dfExtractedHier

def getRandomString(N):
    str_ = ''.join(random.choice(string.ascii_uppercase + string.digits
                                 + string.ascii_lowercase) for _ in range(N))
    return str_


def convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog):

    module_path = os.path.join(basePath)

    if "/" in basePath:
        pathSep = "/"
    else:
        pathSep = "\\"
    
    # Generate output folder path
    output_json_path = os.path.join(basePath, 'outputJSON')

    """
        Check if input folder exists, else throw exception
    """
    if(os.path.exists(module_path)):
        filenames = glob.glob(os.path.join(module_path, htmlDocName))

        # Create language specific folder in outputJSON folder if it doesn't exist
        if(not os.path.exists(output_json_path)):
            os.mkdir(output_json_path)
        logger = MatchLogger(f'Parser_{getRandomString(1)}', htmlDocName,
                             domain, procedureType, languageCode, "HTML", fileNameLog)

        styleLogger = MatchLogger(
            f'Style Dictionary_{getRandomString(1)}', htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)

        styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                             controlBasePath=controlBasePath,
                                             language=languageCode,
                                             fileName=fileNameQrd,
                                             domain=domain,
                                             procedureType=procedureType
                                             )

        parserObj = parserExtractor(config, logger, styleRulesObj.styleRuleDict,
                                    styleRulesObj.styleFeatureKeyList,
                                    styleRulesObj.qrd_section_headings)

        for input_filename in filenames:
          # if(input_filename.find('Kalydeco II-86-PI-clean')!=-1):
            output_filename = os.path.join(output_json_path, htmlDocName)
            style_filepath =  output_filename.replace('.html','.txt')
            style_filepath =  style_filepath.replace('.txtl','.txt')
            style_filepath =  style_filepath.replace('.htm','.txt')
            print("-------------",style_filepath,"-----------------")

            output_filename = output_filename.replace('.html', '.json')
            output_filename = output_filename.replace('.htm', '.json')
            print(input_filename, output_filename)
            parserObj.createPIJsonFromHTML(input_filepath=input_filename,
                                           output_filepath=output_filename,
                                           style_filepath = style_filepath,
                                           img_base64_dict=parserObj.convertImgToBase64(input_filename)
                                           )
            
        return output_filename.split(pathSep)[-1], style_filepath
    else:
        try:    
            raise FolderNotFoundError(module_path + " not found")
        except:  
            logger.logFlowCheckpoint("Folder For Language Code Not Found In Input File")
            logger.logException("Folder For Language Code Not Found In Input File")
        raise FolderNotFoundError(module_path + " not found")
        return None


def splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog):

    styleLogger = MatchLogger(
        f'Style Dictionary_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                        controlBasePath=controlBasePath,
                                        language=languageCode,
                                        fileName=fileNameQrd,
                                        domain=domain,
                                        procedureType=procedureType
                                        )
    
    path_json = os.path.join(basePath,'outputJSON', fileNameJson)
    print("PathJson",path_json)
    partitionLogger = MatchLogger(
        f'Partition_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    partitioner = DocTypePartitioner(partitionLogger)

    partitionedJsonPaths = partitioner.partitionHtmls(
        styleRulesObj.qrd_section_headings, path_json)

    return partitionedJsonPaths


def extractAndValidateHeadings(controlBasePath,
                                basePath,
                                domain,
                                procedureType,
                                languageCode,
                                documentNumber,
                                fileNameDoc,
                                fileNameQrd,
                                fileNameMatchRuleBook,
                                fileNameDocumentTypeNames,
                                fileNameLog,
                                stopWordFilterLen=6,
                                isPackageLeaflet=False,
                                medName=None
                                ):

    if documentNumber == 0:
        topHeadingsConsidered = 4
        bottomHeadingsConsidered = 6
    elif documentNumber == 1:
        topHeadingsConsidered = 3
        bottomHeadingsConsidered = 5
    elif documentNumber == 2:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 15
    else:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 10

    print(f"Starting Heading Extraction For File :- {fileNameDoc}")
    logger = MatchLogger(f"Heading Extraction {fileNameDoc}_{getRandomString(1)}", fileNameDoc, domain, procedureType, languageCode, documentNumber, fileNameLog)
    logger.logFlowCheckpoint("Starting Heading Extraction")

    stopWordlanguage = DocumentTypeNames(
        controlBasePath=controlBasePath,
        fileNameDocumentTypeNames=fileNameDocumentTypeNames,
        languageCode=languageCode,
        domain=domain,
        procedureType=procedureType,
        documentNumber=documentNumber
        ).extractStopWordLanguage()

    matchDocObj = MatchDocument(
        logger,
        controlBasePath,
        basePath,
        domain,
        procedureType,
        languageCode,
        documentNumber,
        fileNameDoc,
        fileNameQrd,
        fileNameMatchRuleBook,
        fileNameDocumentTypeNames,
        topHeadingsConsidered,
        bottomHeadingsConsidered,
        stopWordFilterLen,
        stopWordlanguage,
        isPackageLeaflet,
        medName)
    df, coll, documentType = matchDocObj.matchHtmlHeaddingsWithQrd()

    return df, coll, documentType


def parseDocument(controlBasePath, basePath ,htmlDocName, fileNameQrd, fileNameMatchRuleBook, fileNameDocumentTypeNames, medName = None):
    
    
    if "/" in basePath:
        pathSep = "/"        
    else:
        pathSep = "\\"
    
    fileNameLog = os.path.join(basePath,'FinalLog.txt')

    pathComponents = basePath.split(pathSep)
    print(pathComponents, htmlDocName)
    timestamp = pathComponents[-1]
    languageCode =  pathComponents[-2]
    medName = pathComponents[-3]
    procedureType = pathComponents[-4]
    domain = pathComponents[-5]

    print(timestamp, languageCode, medName, procedureType, domain)
        
    flowLogger =  MatchLogger(f"Flow Logger HTML_{getRandomString(1)}", htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)
    
    metrics = Metrics(os.path.join(basePath,'Metrics.csv'),flowLogger)
    
    
    flowLogger.logFlowCheckpoint("Starting HTML Conversion To Json")
    ###Convert Html to Json
    fileNameJson, stylesFilePath = convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog)
    
    print("stylePath:-",stylesFilePath)
    flowLogger.logFlowCheckpoint("Completed HTML Conversion To Json")
    metrics.getMetric("HTML Conversion To Json")

    flowLogger.logFlowCheckpoint("Starting Json Split")

    ###Split Uber Json to multiple Jsons for each category.
    partitionedJsonPaths = splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog)
    
    partitionedJsonPaths = [ path.split(pathSep)[-1] for path in partitionedJsonPaths]
    flowLogger.logFlowCheckpoint(str(partitionedJsonPaths))
    
    flowLogger.logFlowCheckpoint("Completed Json Split")
    metrics.getMetric("Split Json")
    
    flowLogger.logFlowCheckpoint("Started Processing Partitioned Jsons")
    
    for index, fileNamePartitioned in enumerate(partitionedJsonPaths):
        flowLogger.logFlowCheckpoint(f"\n\n\n\n||||||||||||||||||||||||||||||||{str(index)} ||||| {str(fileNamePartitioned)}||||||||||||||||||||||||||||||||\n\n\n\n")
        
        if index == 3:
            stopWordFilterLen = 100
            isPackageLeaflet = True
        else:
            stopWordFilterLen = 6
            isPackageLeaflet = False
            
        df, coll, documentType = extractAndValidateHeadings(controlBasePath,
                                    basePath,
                                    domain,
                                    procedureType,
                                    languageCode,
                                    index,
                                    fileNamePartitioned,
                                    fileNameQrd,
                                    fileNameMatchRuleBook,
                                    fileNameDocumentTypeNames,
                                    fileNameLog,
                                    stopWordFilterLen=stopWordFilterLen,
                                    isPackageLeaflet=isPackageLeaflet,
                                    medName=medName)
        
        
        print(f"Completed Heading Extraction For File")
        flowLogger.logFlowCheckpoint("Completed Heading Extraction For File")
        metrics.getMetric(f"{index}: Heading Extraction")

        print(f"Starting Document Annotation For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Document Annotation For File")
        documentAnnotationObj = DocumentAnnotation(fileNamePartitioned,'c20835db4b1b4e108828a8537ff41506','https://spor-sit.azure-api.net/pms/api/v2/',df,coll)
        try:
            pms_oms_annotation_data = documentAnnotationObj.processRegulatedAuthorizationForDoc()
            print(pms_oms_annotation_data)
        except:
            pms_oms_annotation_data = None
            print("Error Found")
            
        print(f"Completed Document Annotation")        
        flowLogger.logFlowCheckpoint("Completed Document Annotation")
        metrics.getMetric(f"{index}: Document Annotation")
        
        print(f"Starting Extracting Content Between Heading For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Extracting Content Between Heading")
        
        extractContentlogger =  MatchLogger(f'ExtractContentBetween_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        extractorObj = DataBetweenHeadingsExtractor(extractContentlogger, basePath, coll)
        dfExtractedHierRR = extractorObj.extractContentBetweenHeadings(fileNamePartitioned)
        
        print(f"Completed Extracting Content Between Heading")        
        flowLogger.logFlowCheckpoint("Completed Extracting Content Between Heading")
        metrics.getMetric(f"{index}: Content Extraction")
        
        xmlLogger =  MatchLogger(f'XmlGeneration_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        fhirXmlGeneratorObj = FhirXmlGenerator(xmlLogger, controlBasePath, basePath, pms_oms_annotation_data, stylesFilePath, medName)
        fileNameXml = fileNamePartitioned.replace('.json','.xml')
        generatedXml = fhirXmlGeneratorObj.generateXml(dfExtractedHierRR, fileNameXml)
        
        metrics.getMetric(f"{index}: Generate XML")
        
        fhirServiceLogger =  MatchLogger(f'XML Submission Logger_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)

        fhirServiceObj = FhirService(fhirServiceLogger, basePath, generatedXml)
        fhirServiceObj.submitFhirXml()
        
        metrics.getMetric(f"{index}: Submit FHIR Msg")
        
        print(f"Created XML File For :- {fileNamePartitioned}")      
        
        #return df,coll,dfExtractedHierRR
    
    
    flowLogger.logFlowCheckpoint("Completed Processing Partitioned Jsons")
    metrics.getMetric(f"{index}: Completed")
    metrics.end()

In [3]:
from wordToHtmlConvertor.wordToHtmlConvertor import WordToHtmlConvertor

wordToHtmlConvertorObj = WordToHtmlConvertor()
wordToHtmlConvertorObj.convertWordToHTML()

2021-05-19 00:50:50,658 : WordToHtmlLogger_M : Input file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\ABASAGLAR~H~CAP~en.docx | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-19 00:50:50,659 : WordToHtmlLogger_M : Output file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ABASAGLAR\en\2021-05-18T19-20-50Z\ABASAGLAR_clean | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx


Word Files in folder:  ['ABASAGLAR~H~CAP~en.docx', 'Abilify Maintena~H~CAP~en.doc', 'ABILIFY~H~CAP~en.doc', 'Adakveo~H~CAP~en.docx', 'Adcetris~H~CAP~en.doc', '~$ilify Maintena~H~CAP~en.doc']
Input file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\ABASAGLAR~H~CAP~en.docx
Output file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ABASAGLAR\en\2021-05-18T19-20-50Z\ABASAGLAR_clean


2021-05-19 00:51:09,654 : WordToHtmlLogger_M : Opened file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\ABASAGLAR~H~CAP~en.docx | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx


Opened file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\ABASAGLAR~H~CAP~en.docx


2021-05-19 00:51:11,212 : WordToHtmlLogger_M : Starting document cleaning process | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx



Checking table 20
The selection starts on page 105 of 106 (69.44999694824219/70.75)
The selection ends on page 105 of 106 (384.8500061035156/70.75)
The selection contains
* overlay images

Checking table 19
The selection starts on page 103 of 106 (579.2000122070312/70.75)
The selection ends on page 105 of 106 (56.70000076293945/70.75)
The selection contains
* overlay images

Checking table 18
The selection starts on page 102 of 105 (564.5499877929688/214.75)
The selection ends on page 103 of 105 (421.79998779296875/70.75)
The selection contains
* overlay images
* overlay shapes

Checking table 17
The selection starts on page 102 of 105 (56.70000076293945/70.75)
The selection ends on page 102 of 105 (475.95001220703125/70.75)
The selection contains
* overlay images

Checking table 16
The selection starts on page 100 of 105 (577.9000244140625/70.75)
The selection ends on page 101 of 105 (484.25/70.75)
The selection contains
* overlay images

Checking table 15
The selection starts on pag

2021-05-19 00:51:51,072 : WordToHtmlLogger_M : Completed document cleaning process | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-19 00:51:51,078 : WordToHtmlLogger_M : Preparing zip file | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-19 00:51:52,742 : WordToHtmlLogger_M : Zip file created: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\ABASAGLAR~H~CAP~en~2021-05-18T19-20-50Z.zip | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-19 00:51:52,746 : WordToHtmlLogger_M : Uploading to Azure Storage as blob:
	F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\ABASAGLAR~H~CAP~en~2021-05-18T19-20-50Z.zip | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx



Uploading File to  Azure Storage:
	F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\ABASAGLAR~H~CAP~en~2021-05-18T19-20-50Z.zip


2021-05-19 00:52:05,189 : WordToHtmlLogger_M : Uploaded F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\ABASAGLAR~H~CAP~en~2021-05-18T19-20-50Z.zipsuccessfully | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-19 00:52:05,190 : WordToHtmlLogger_M : Deleting input word file: ABASAGLAR~H~CAP~en.docx | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-19 00:52:05,217 : WordToHtmlLogger_2 : Input file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-19 00:52:05,218 : WordToHtmlLogger_2 : Output file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Abilify Maintena\en\2021-05-18T19-22-05Z\Abilify Maintena_clean | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc


Input file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc
Output file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Abilify Maintena\en\2021-05-18T19-22-05Z\Abilify Maintena_clean


2021-05-19 00:54:18,892 : WordToHtmlLogger_2 : Opened file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc


Opened file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc


2021-05-19 00:54:20,499 : WordToHtmlLogger_2 : Starting document cleaning process | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc



Checking table 23
The selection starts on page 95 of 95 (619.3499755859375/70.75)
The selection ends on page 95 of 95 (737.2000122070312/70.75)
The selection contains
* inline images

Checking table 22
The selection starts on page 95 of 95 (303.29998779296875/70.75)
The selection ends on page 95 of 95 (480.3999938964844/70.75)
The selection contains
* inline images

Checking table 21
The selection starts on page 95 of 95 (107.05000305175781/70.75)
The selection ends on page 95 of 95 (227.4499969482422/70.75)
The selection contains
* inline images

Checking table 20
The selection starts on page 94 of 95 (653.0999755859375/70.75)
The selection ends on page 95 of 95 (56.70000076293945/70.75)
The selection contains
* inline images

Checking table 19
The selection starts on page 94 of 95 (494.45001220703125/70.75)
The selection ends on page 94 of 95 (627.5999755859375/70.75)
The selection contains
* inline images

Checking table 18
The selection starts on page 94 of 95 (353.0/70.75)
The se

2021-05-19 00:54:46,529 : WordToHtmlLogger_2 : Completed document cleaning process | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-19 00:54:46,532 : WordToHtmlLogger_2 : Preparing zip file | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-19 00:54:47,821 : WordToHtmlLogger_2 : Zip file created: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-18T19-22-05Z.zip | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-19 00:54:47,825 : WordToHtmlLogger_2 : Uploading to Azure Storage as blob:
	F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-18T19-22-05Z.zip | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc



Uploading File to  Azure Storage:
	F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-18T19-22-05Z.zip


2021-05-19 00:54:59,563 : WordToHtmlLogger_2 : Uploaded F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-18T19-22-05Z.zipsuccessfully | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-19 00:54:59,564 : WordToHtmlLogger_2 : Deleting input word file: Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-19 00:54:59,573 : WordToHtmlLogger_2 : Killing Word processes as exception was raised | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc


Exception raised
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE


NoSuchProcess: psutil.NoSuchProcess process no longer exists (pid=34304)

In [66]:
# inputZipFolderPath = "F:\Projects\EMA\Repository\EMA EPI PoC\\function_code\\inputblob"
inputZipFolderPath = os.path.abspath(os.path.join('..'))
inputZipFolderPath = os.path.join(inputZipFolderPath, 'inputblob')
inputZipFileName = "Kalydeco~H~CAP~en~2021-05-19T09-22-20Z.zip"

In [67]:
fileNameQrd = 'qrd_canonical_model.csv'
fileNameMatchRuleBook = 'ruleDict.json'
fileNameDocumentTypeNames = 'documentTypeNames.json'
fsMountName = '/mounted'

info = inputZipFileName.split("~")

try:
    medName = info[0]
    domain = info[1]
    procedureType = info[2]
    languageCode = info[3]
    timestamp = info[4]
    timestamp = timestamp.replace(".zip","")

except Exception:
    raise f"Missing required info in the zip file name {inputZipFileName}"

if "\\" in os.getcwd():
    localEnv = True
    inputZipFolderPath = os.path.join(os.path.abspath(os.path.join('..')),inputZipFolderPath)
    outputFolderPath = os.path.join(os.path.abspath(os.path.join('..')), 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
    controlFolderPath = os.path.join(os.path.abspath(os.path.join('..')),'control')
else:
    localEnv = False
    inputZipFolderPath = os.path.join(f'{fsMountName}',inputZipFolderPath)
    outputFolderPath = os.path.join(f'{fsMountName}', 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
    controlFolderPath = os.path.join(f'{fsMountName}','control')


print(inputZipFileName, inputZipFolderPath, outputFolderPath, controlFolderPath)

mode = 0o666

if localEnv is True:
    inputZipFolderPath = inputZipFolderPath.replace("/","\\")
    outputFolderPath = outputFolderPath.replace("/","\\")
    controlFolderPath = controlFolderPath.replace("/","\\")

try:
    os.makedirs(inputZipFolderPath, mode)
    os.makedirs(outputFolderPath, mode)
    os.makedirs(controlFolderPath, mode)

except Exception:
    print("Already Present")
    
with zipfile.ZipFile(f'{inputZipFolderPath}/{inputZipFileName}',"r") as zip_ref:
        zip_ref.extractall(outputFolderPath)
    

_,_,fileNames = next(os.walk(outputFolderPath))
htmlFileName = [fileName for fileName in fileNames if ".htm" in fileName][0]

print(htmlFileName)



Kalydeco~H~CAP~en~2021-05-19T09-22-20Z.zip F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z F:\Projects\EMA\Repository\EMA EPI PoC\function_code\control
Already Present
Kalydeco_clean.htm


In [68]:
parseDocument(controlFolderPath, outputFolderPath, htmlFileName, fileNameQrd, fileNameMatchRuleBook, fileNameDocumentTypeNames, medName)

2021-05-20 16:46:12,894 : Flow Logger HTML_t : Starting HTML Conversion To Json | H | CAP |  en | HTML | Kalydeco_clean.htm
2021-05-20 16:46:12,904 : Style Dictionary_8 : Reading style dictionary in file: rule_dictionary_en.json | H | CAP |  en | HTML | Kalydeco_clean.htm
2021-05-20 16:46:12,996 : Style Dictionary_8 : Qrd Section Keys Retrieved For Style Dictionary: ANNEX I, ANNEX II, ANNEX III, B. PACKAGE LEAFLET | H | CAP |  en | HTML | Kalydeco_clean.htm


['F:', 'Projects', 'EMA', 'Repository', 'EMA EPI PoC', 'function_code', 'work', 'H', 'CAP', 'Kalydeco', 'en', '2021-05-19T09-22-20Z'] Kalydeco_clean.htm
2021-05-19T09-22-20Z en Kalydeco CAP H
------------- F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\outputJSON\Kalydeco_clean.txt -----------------
F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\Kalydeco_clean.htm F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\outputJSON\Kalydeco_clean.json


2021-05-20 16:46:14,694 : Parser_X : Style Information Stored In File: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\outputJSON\Kalydeco_clean.txt | H | CAP |  en | HTML | Kalydeco_clean.htm
2021-05-20 16:46:20,409 : Parser_X : Writing to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\outputJSON\Kalydeco_clean.json | H | CAP |  en | HTML | Kalydeco_clean.htm
2021-05-20 16:46:20,862 : Flow Logger HTML_t : Completed HTML Conversion To Json | H | CAP |  en | HTML | Kalydeco_clean.htm
2021-05-20 16:46:20,862 : Flow Logger HTML_t : HTML Conversion To Json,0.1328 Min,12.056966 MB,20.389168 MB,66.3%
 | H | CAP |  en | HTML | Kalydeco_clean.htm
2021-05-20 16:46:20,882 : Flow Logger HTML_t : Starting Json Split | H | CAP |  en | HTML | Kalydeco_clean.htm
2021-05-20 16:46:20,890 : Style Dictionary_a : Reading style dictionary in file: rule_dictionary_en.json | H | CAP |  en | Json | Kalydec

stylePath:- F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\outputJSON\Kalydeco_clean.txt
Metrics : HTML Conversion To Json,0.1328 Min,12.056966 MB,20.389168 MB,66.3%

PathJson F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\outputJSON\Kalydeco_clean.json


2021-05-20 16:46:21,277 : Partition_3 : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\partitionedJSONs\Kalydeco_clean_SmPC.json | H | CAP |  en | Json | Kalydeco_clean.json
2021-05-20 16:46:21,417 : Partition_3 : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\partitionedJSONs\Kalydeco_clean_ANNEX II.json | H | CAP |  en | Json | Kalydeco_clean.json
2021-05-20 16:46:21,571 : Partition_3 : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\partitionedJSONs\Kalydeco_clean_ANNEX III.json | H | CAP |  en | Json | Kalydeco_clean.json
2021-05-20 16:46:21,594 : Partition_3 : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\partitionedJSONs\Kalydeco_clean_ PACKAGE LEAFLET.json | H | CAP |  en | Js

Metrics : Split Json,0.0123 Min,0.154051 MB,17.279588 MB,66.3%

Starting Heading Extraction For File :- Kalydeco_clean_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\partitionedJSONs\Kalydeco_clean_SmPC.json
--------------------------------------------
SmPC


2021-05-20 16:46:22,128 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Started Extracting Heading | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json
2021-05-20 16:46:22,315 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'SUMMARY OF PRODUCT CHARACTERISTICS' | Qrd txt :- 'SUMMARY OF PRODUCT CHARACTERISTICS' | Matched :- 'True'
2021-05-20 16:46:22,323 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Passed As This The First Heading | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-05-20 16:46:22,351 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-05-20 16:46:22,359 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Flow Is Broken | H | 

2021-05-20 16:46:30,430 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:46:30,446 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Flow Is Broken | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20031' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20011'
2021-05-20 16:46:30,462 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Current H3 Heading Is Not Part Of Valid H3 Headings in Previous H2 | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20031' | prevHeadingCurrId :- '20011' | prevHeadingFoundId :- '20011'
2021-05-20 16:46:30,478 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:46:30

2021-05-20 16:46:40,049 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:46:40,064 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Wrong Heading Found | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20013' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20020'
2021-05-20 16:46:40,090 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:46:40,103 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Wrong Heading Found | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20019' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20020'
2021-05-20 16:46:40,109 : Heading Extraction Kalydeco_cl

2021-05-20 16:46:46,659 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Wrong Heading Found | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20019' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20027'
2021-05-20 16:46:46,674 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:46:46,697 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20021' | prevHeadingCurrId :- '20020' | prevHeadingFoundId :- '20027'
2021-05-20 16:46:46,736 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:46:46,767 : Headin

2021-05-20 16:46:56,759 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:46:56,772 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20021' | prevHeadingCurrId :- '20020' | prevHeadingFoundId :- '20033'
2021-05-20 16:46:56,795 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:46:56,813 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20028' | prevHeadingCurrId :- '20027' | prevHeadingFoundId :- '20033'
2021-05-

2021-05-20 16:47:00,404 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:47:00,416 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20031' | prevHeadingCurrId :- '20030' | prevHeadingFoundId :- '20038'
2021-05-20 16:47:00,434 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:47:00,446 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Wrong Heading Found | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20037' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20038'
2021-05-20 16:47:01,979 : Headin

2021-05-20 16:47:18,030 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Passed As This The First Heading | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20003' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''


oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-05-20 16:47:18,861 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- '2. QUALITATIVE AND QUANTITATIVE COMPOSITION' | Qrd txt :- '2. QUALITATIVE AND QUANTITATIVE COMPOSITION' | Matched :- 'True'
2021-05-20 16:47:18,871 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20004' | prevHeadingCurrId :- '20003' | prevHeadingFoundId :- '20003'
2021-05-20 16:47:19,385 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed : <=4|11.11|(95, 89, 95)|0.989| | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Excipient with known effect' | Qrd txt :- 'Excipient(s) with known effect' | Matched :- 'True'
2021-05-20 16:47:19,399 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Flow Is Broken | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20007' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20004

2021-05-20 16:47:24,771 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Current H3 Heading Is Not Part Of Valid H3 Headings in Previous H2 | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20028' | prevHeadingCurrId :- '20011' | prevHeadingFoundId :- '20011'
2021-05-20 16:47:24,783 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:47:24,795 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Flow Is Broken | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20031' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20011'
2021-05-20 16:47:24,806 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Current H3 Heading Is Not Part Of Valid H3 Headings in Previous H2 | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20031' | prevHeadingC

2021-05-20 16:47:29,824 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Flow Is Broken | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20020' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20017'
2021-05-20 16:47:29,832 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20020' | prevHeadingCurrId :- '20017' | prevHeadingFoundId :- '20017'
2021-05-20 16:47:32,807 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:47:32,819 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Wrong Heading Found | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20013' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20020'
2021-05-20 16:47:32,840 : Heading Extraction Kalydeco_clean_SmPC.json_2 :

2021-05-20 16:47:36,260 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Wrong Heading Found | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20013' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20027'
2021-05-20 16:47:36,280 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:47:36,291 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Wrong Heading Found | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20019' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20027'
2021-05-20 16:47:36,297 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:47:36,308 : Heading Extraction Kalydeco_cl

2021-05-20 16:47:43,993 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:47:44,005 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Wrong Heading Found | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20019' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20033'
2021-05-20 16:47:44,013 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:47:44,022 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20021' | prevHeadingCurrId :- '20020' | prevHeadingFoundId :- '20033'
2021-05-20 16:47:44,042 : Headin

2021-05-20 16:47:47,347 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:47:47,357 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20028' | prevHeadingCurrId :- '20027' | prevHeadingFoundId :- '20038'
2021-05-20 16:47:47,369 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Match Passed | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 16:47:47,379 : Heading Extraction Kalydeco_clean_SmPC.json_2 : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json | currHeadId :- '20031' | prevHeadingCurrId :- '20030' | prevHeadingFoundId :- '20038'
2021-05-

2021-05-20 16:47:56,464 : Flow Logger HTML_t : Starting Document Annotation For File | H | CAP |  en | HTML | Kalydeco_clean.htm




Heading Not Found 
 ['qThis medicinal product is subject to additional monitoring. This will allow quick identification of new safety information. Healthcare professionals are asked to report any suspected adverse reactions. See section 4.8 for how to report adverse reactions.', 'General description', 'Qualitative and quantitative composition', 'Excipient(s) with known effect', 'Posology', 'Paediatric population', 'Method of administration ', 'Precautions to be taken before handling or administering the medicinal product', 'Traceability', 'Paediatric population', 'Paediatric population', 'Pregnancy', 'Breast-feeding', 'Fertility', 'Paediatric population', 'Reporting of suspected adverse reactions', 'Paediatric population', 'Mechanism of action', 'Pharmacodynamic effects', 'Clinical efficacy and safety', 'Paediatric population', 'Absorption', 'Distribution', 'Biotransformation', 'Elimination', 'Linearity/non-linearity', 'Pharmacokinetic/pharmacodynamic relationship(s)', 'Environmental

2021-05-20 16:47:57,608 : Flow Logger HTML_t : Completed Document Annotation | H | CAP |  en | HTML | Kalydeco_clean.htm
2021-05-20 16:47:57,610 : Flow Logger HTML_t : 0: Document Annotation,0.0191 Min,0.181587 MB,0.217036 MB,66.1%
 | H | CAP |  en | HTML | Kalydeco_clean.htm
2021-05-20 16:47:57,612 : Flow Logger HTML_t : Starting Extracting Content Between Heading | H | CAP |  en | HTML | Kalydeco_clean.htm
2021-05-20 16:47:57,618 : ExtractContentBetween_0_n : Cleaning Match Results | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json
2021-05-20 16:47:57,626 : ExtractContentBetween_0_n : Finished Cleaning Match Results | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json
2021-05-20 16:47:57,748 : Flow Logger HTML_t : Completed Extracting Content Between Heading | H | CAP |  en | HTML | Kalydeco_clean.htm
2021-05-20 16:47:57,750 : Flow Logger HTML_t : 0: Content Extraction,0.0023 Min,1.193596 MB,7.088399 MB,66.1%
 | H | CAP |  en | HTML | Kalydeco_clean.htm
2021-05-20 16:47:57,791 : XmlGeneration_

Error Found
Completed Document Annotation
Metrics : 0: Document Annotation,0.0191 Min,0.181587 MB,0.217036 MB,66.1%

Starting Extracting Content Between Heading For File :- Kalydeco_clean_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\partitionedJSONs\Kalydeco_clean_SmPC.json
--------------------------------------------
Completed Extracting Content Between Heading
Metrics : 0: Content Extraction,0.0023 Min,1.193596 MB,7.088399 MB,66.1%



2021-05-20 16:47:58,333 : XmlGeneration_0_F : Writing to File:Kalydeco_clean_SmPC.xml | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json
2021-05-20 16:47:58,339 : Flow Logger HTML_t : 0: Generate XML,0.0098 Min,7.133252 MB,10.893013 MB,66.0%
 | H | CAP |  en | HTML | Kalydeco_clean.htm


Metrics : 0: Generate XML,0.0098 Min,7.133252 MB,10.893013 MB,66.0%



2021-05-20 16:48:03,919 : XML Submission Logger_0_P : Initiating Submission To FHIR Server | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json
2021-05-20 16:48:03,919 : XML Submission Logger_0_P : Initiating Submission To FHIR Server | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json
2021-05-20 16:48:03,928 : XML Submission Logger_0_P : Response{"resourceType":"Bundle","id":"fa99ec51-5f70-49ce-865f-3873818dea29","meta":{"versionId":"1","lastUpdated":"2021-05-20T11:18:02.38+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:f723b134-302e-4f33-8a8d-df341e6a5f94","resource":{"resourceType":"Bundle","id":"cdc4f992-b18a-4840-82f3-483ac243cd5c","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-05-20T11:17:57+00:00","entry":[{"fullUrl | H | CAP |  en | 0 | Kalydeco_clean_SmPC.json
2021-05-20 16:48:03,928 : XML Submission Logger_0_P : Response{"resourceType":"Bundle","id":"fa99ec51-5f70-49ce-86

POST sucessful: XML added with id fa99ec51-5f70-49ce-865f-3873818dea29
Metrics : 0: Submit FHIR Msg,0.0934 Min,0.240812 MB,3.373031 MB,66.0%

Created XML File For :- Kalydeco_clean_SmPC.json
Starting Heading Extraction For File :- Kalydeco_clean_ANNEX II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\partitionedJSONs\Kalydeco_clean_ANNEX II.json
--------------------------------------------
AnnexII


2021-05-20 16:48:04,234 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Started Extracting Heading | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json
2021-05-20 16:48:04,246 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Match Passed | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json | Doc txt :- 'annex ii' | Qrd txt :- 'annex ii' | Matched :- 'True'
2021-05-20 16:48:04,248 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Match Passed In Lowercase  :  | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json | Doc txt :- 'Annex II' | Qrd txt :- 'ANNEX II' | Matched :- 'True'
2021-05-20 16:48:04,255 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Validation Passed As This The First Heading | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json | currHeadId :- '21001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-05-20 16:48:04,273 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Match Passed : Contains<>|118.75|(61, 91, 86)|0.807| | H | CAP |  en | 1 | Kaly


OriginalCheck



2021-05-20 16:48:04,437 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Validation Flow Is Broken | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json | currHeadId :- '21007' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '21005'
2021-05-20 16:48:04,446 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Validation Passed | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json | currHeadId :- '21007' | prevHeadingCurrId :- '21005' | prevHeadingFoundId :- '21005'
2021-05-20 16:48:04,529 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Match Passed | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json | Doc txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-05-20 16:48:04,542 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Validation Flow Is Broken | H | CAP |  en | 1 | Kalydeco_clean_AN


OriginalCheck

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo

OriginalCheck



 address of the manufacturer responsible for the release of the concerned batch.' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'False'
2021-05-20 16:48:05,609 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Match Passed | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json | Doc txt :- 'B. CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE' | Qrd txt :- 'B. CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE' | Matched :- 'True'
2021-05-20 16:48:05,620 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Validation Passed | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json | currHeadId :- '21005' | prevHeadingCurrId :- '21004' | prevHeadingFoundId :- '21004'



OriginalCheck



 prescription (see Annex I: Summary of Product Characteristics, section 4.2).' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'False'oduct subject to restricted medical
2021-05-20 16:48:05,884 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Match Passed | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json | Doc txt :- 'c. conditions requirements marketing authorisation' | Qrd txt :- 'c. conditions requirements marketing authorisation' | Matched :- 'True'
 the Marketing Authorisation' | Qrd txt :- 'C. OTHER CONDITIONS AND REQUIREMENTS OF THE MARKETING AUTHORISATION' | Matched :- 'True'lydeco_clean_ANNEX II.json | Doc txt :- 'C.      Other conditions and requirements of
2021-05-20 16:48:05,897 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Validation Flow Is Broken | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json | currHeadId :- '21007' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '21005'
2021


OriginalCheck


OriginalCheck



2021-05-20 16:48:05,965 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Match Passed : <=7|2.56|(99, 100, 100)|0.995| | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json | Doc txt :- '·Periodic safety update reports (PSURs)' | Qrd txt :- 'Periodic safety update reports (PSURs)' | Matched :- 'True'
2021-05-20 16:48:05,974 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Validation Passed | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json | currHeadId :- '21008' | prevHeadingCurrId :- '21007' | prevHeadingFoundId :- '21007'
2021-05-20 16:48:06,045 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Match Passed | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json | Doc txt :- 'd. conditions restrictions regard safe effective use medicinal product' | Qrd txt :- 'd. conditions restrictions regard safe effective use medicinal product' | Matched :- 'True'
 to the safe and effective use of the medicinal product' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND


OriginalCheck



 to conduct post-authorisation measures' | Qrd txt :- 'Obligation to conduct post-authorisation measures' | Matched :- 'True'H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json | Doc txt :- '·Obligation
2021-05-20 16:48:06,513 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Validation Flow Is Broken | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json | currHeadId :- '21012' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '21010'
2021-05-20 16:48:06,521 : Heading Extraction Kalydeco_clean_ANNEX II.json_X : Validation Passed | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json | currHeadId :- '21012' | prevHeadingCurrId :- '21010' | prevHeadingFoundId :- '21010'
 below measures:' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FORrcase : Contains<>|274.07|(8, 13, 86)|0.334| | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json | Doc txt :- 'The MAH shall complete, within the stated timeframe, the
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORI


OriginalCheck



Heading Not Found 
 ['Name and address of the manufacturer(s) of the biological active substance(s)', 'Official batch release', 'Additional risk minimisation measures', 'SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FOR\r\n<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>']


dict_keys([])
Completed Heading Extraction For File
Metrics : 1: Heading Extraction,0.0455 Min,0.658542 MB,2.857535 MB,66.0%

Starting Document Annotation For File :- Kalydeco_clean_ANNEX II.json
Error Found
Completed Document Annotation
Metrics : 1: Document Annotation,0.0002 Min,0.161719 MB,0.164272 MB,66.0%

Starting Extracting Content Between Heading For File :- Kalydeco_clean_ANNEX II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\partitionedJSONs\Kalydeco_clean_ANNEX II.json
--------------------------------------------
Completed Extracting Content 

2021-05-20 16:48:08,679 : XML Submission Logger_1_a : Initiating Submission To FHIR Server | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json
2021-05-20 16:48:08,681 : XML Submission Logger_1_a : Response{"resourceType":"Bundle","id":"0b2f91fb-186f-4cdb-b936-f2a2f5da4035","meta":{"versionId":"1","lastUpdated":"2021-05-20T11:18:08.114+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:4f6af024-0093-4929-b211-7d7910cf3b0d","resource":{"resourceType":"Bundle","id":"551181ce-f093-477b-b1e2-891fdeb9009f","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-05-20T11:18:06+00:00","entry":[{"fullUr | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json
2021-05-20 16:48:08,687 : XML Submission Logger_1_a : POST sucessful: XML added with id: 0b2f91fb-186f-4cdb-b936-f2a2f5da4035 | H | CAP |  en | 1 | Kalydeco_clean_ANNEX II.json
2021-05-20 16:48:08,692 : Flow Logger HTML_t : 1: Submit FHIR Msg,0

POST sucessful: XML added with id 0b2f91fb-186f-4cdb-b936-f2a2f5da4035
Metrics : 1: Submit FHIR Msg,0.0315 Min,0.255857 MB,0.687396 MB,66.1%

Created XML File For :- Kalydeco_clean_ANNEX II.json
Starting Heading Extraction For File :- Kalydeco_clean_ANNEX III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\partitionedJSONs\Kalydeco_clean_ANNEX III.json
--------------------------------------------
Labelling


2021-05-20 16:48:09,054 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Started Extracting Heading | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json
2021-05-20 16:48:09,350 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed : <=1|25.0|(86, 100, 95)|0.921| | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- 'A. LABELLING' | Qrd txt :- 'LABELLING ' | Matched :- 'True'
2021-05-20 16:48:09,356 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed As This The First Heading | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
 THE OUTER PACKAGING' | Qrd txt :- 'PARTICULARS TO APPEAR ON <THE OUTER PACKAGING> <AND> <THE IMMEDIATE PACKAGING>' | Matched :- 'True' | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- 'PARTICULARS TO APPEAR ON
2021-05-20 16:48:09,375 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 

2021-05-20 16:48:12,548 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22016' | prevHeadingCurrId :- '22015' | prevHeadingFoundId :- '22015'
2021-05-20 16:48:12,637 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '15. INSTRUCTIONS ON USE' | Qrd txt :- '15. INSTRUCTIONS ON USE' | Matched :- 'True'
2021-05-20 16:48:12,648 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22017' | prevHeadingCurrId :- '22016' | prevHeadingFoundId :- '22016'
2021-05-20 16:48:12,748 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '16. INFORMATION IN BRAILLE' | Qrd txt :- '16. INFORMATION IN BRAILLE' | Matched :- 'True'
2021-05-20 16:48:12,762 : Heading Extr


OriginalCheck



2021-05-20 16:48:14,556 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed : <=4|7.14|(93, 93, 93)|0.971| | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '3.         EXPIRY DATE' | Qrd txt :- '8. EXPIRY DATE' | Matched :- 'True'
2021-05-20 16:48:14,566 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22010' | prevHeadingCurrId :- '22009' | prevHeadingFoundId :- '22023'
2021-05-20 16:48:14,650 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '3. EXPIRY DATE' | Qrd txt :- '3. EXPIRY DATE' | Matched :- 'True'
2021-05-20 16:48:14,661 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22024' | prevHeadingCurrId :- '22023' | prevHeadingFoundId :- '22023'

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-05-20 16:48:15,278 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-05-20 16:48:15,287 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22003' | prevHeadingCurrId :- '22002' | prevHeadingFoundId :- '22002'
2021-05-20 16:48:15,550 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Qrd txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Matched :- 'True'
2021-05-20 16:48:15,561 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22004' | prevHeadingCurrId :- '22003' | prevHeadingFoundId :- '22003'
20

2021-05-20 16:48:19,354 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '17. UNIQUE IDENTIFIER – 2D BARCODE' | Qrd txt :- '17. UNIQUE IDENTIFIER – 2D BARCODE' | Matched :- 'True'
2021-05-20 16:48:19,367 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22019' | prevHeadingCurrId :- '22018' | prevHeadingFoundId :- '22018'
2021-05-20 16:48:19,751 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed : <=7|2.33|(98, 98, 100)|0.995| | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '18.       UNIQUE IDENTIFIER – HUMAN READABLE DATA' | Qrd txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Matched :- 'True'
2021-05-20 16:48:19,762 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22020' | prevH

-------------------Here1------------------------
oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-05-20 16:48:20,139 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-05-20 16:48:20,148 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22003' | prevHeadingCurrId :- '22002' | prevHeadingFoundId :- '22002'
2021-05-20 16:48:20,464 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Qrd txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Matched :- 'True'
2021-05-20 16:48:20,475 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22004' | prevHeadingCurrId :- '22003' | prevHeadingFoundId :- '22003'
20

2021-05-20 16:48:24,142 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '17. UNIQUE IDENTIFIER – 2D BARCODE' | Qrd txt :- '17. UNIQUE IDENTIFIER – 2D BARCODE' | Matched :- 'True'
2021-05-20 16:48:24,154 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22019' | prevHeadingCurrId :- '22018' | prevHeadingFoundId :- '22018'
2021-05-20 16:48:24,311 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed : <=7|2.33|(98, 98, 100)|0.995| | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '18.       UNIQUE IDENTIFIER – HUMAN READABLE DATA' | Qrd txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Matched :- 'True'
2021-05-20 16:48:24,322 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22020' | prevH


OriginalCheck



2021-05-20 16:48:25,320 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22010' | prevHeadingCurrId :- '22009' | prevHeadingFoundId :- '22023'
2021-05-20 16:48:25,403 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '3. EXPIRY DATE' | Qrd txt :- '3. EXPIRY DATE' | Matched :- 'True'
2021-05-20 16:48:25,422 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22024' | prevHeadingCurrId :- '22023' | prevHeadingFoundId :- '22023'
2021-05-20 16:48:25,495 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed : Contains<>|186.67|(49, 93, 86)|0.756| | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '4.         BATCH NUMBER' | Qrd txt :- '13. BATCH NUMBER<, DONATI

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-05-20 16:48:26,044 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-05-20 16:48:26,054 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22003' | prevHeadingCurrId :- '22002' | prevHeadingFoundId :- '22002'
2021-05-20 16:48:26,349 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Qrd txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Matched :- 'True'
2021-05-20 16:48:26,360 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22004' | prevHeadingCurrId :- '22003' | prevHeadingFoundId :- '22003'
20

2021-05-20 16:48:30,012 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '17. UNIQUE IDENTIFIER – 2D BARCODE' | Qrd txt :- '17. UNIQUE IDENTIFIER – 2D BARCODE' | Matched :- 'True'
2021-05-20 16:48:30,041 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22019' | prevHeadingCurrId :- '22018' | prevHeadingFoundId :- '22018'
2021-05-20 16:48:30,416 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed : <=7|2.33|(98, 98, 100)|0.995| | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '18.       UNIQUE IDENTIFIER – HUMAN READABLE DATA' | Qrd txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Matched :- 'True'
2021-05-20 16:48:30,427 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22020' | prevH

-------------------Here1------------------------
oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-05-20 16:48:30,640 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : (2, 2.0, 'CAP') Validation Passed As Current Heading Is Same As Previous H1 Heading | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22002' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-05-20 16:48:30,776 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-05-20 16:48:30,786 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22003' | prevHeadingCurrId :- '22002' | prevHeadingFoundId :- '22002'
2021-05-20 16:48:31,070 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Qrd txt :- '2. 

2021-05-20 16:48:33,849 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22018' | prevHeadingCurrId :- '22017' | prevHeadingFoundId :- '22017'
2021-05-20 16:48:33,962 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '17. UNIQUE IDENTIFIER – 2D BARCODE' | Qrd txt :- '17. UNIQUE IDENTIFIER – 2D BARCODE' | Matched :- 'True'
2021-05-20 16:48:33,972 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22019' | prevHeadingCurrId :- '22018' | prevHeadingFoundId :- '22018'
2021-05-20 16:48:34,124 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed : <=7|2.33|(98, 98, 100)|0.995| | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '18.       UNIQUE IDENTIFIER – HUMAN READABLE DATA' | Qrd txt :- '18. UNIQU

-------------------Here1------------------------
oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-05-20 16:48:34,531 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-05-20 16:48:34,541 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22003' | prevHeadingCurrId :- '22002' | prevHeadingFoundId :- '22002'
 SUBSTANCE' | Qrd txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Matched :- 'True'ch Passed : <=7|9.38|(96, 100, 97)|0.99| | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '2.         STATEMENT OF ACTIVE
2021-05-20 16:48:34,893 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22004' | prevHeadingCurrId :- '22003' | prevHeadingFoundId :- '22003'
2021-05-20 16:48:35,160 : Heading Extraction Kal

2021-05-20 16:48:39,198 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '17. UNIQUE IDENTIFIER – 2D BARCODE' | Qrd txt :- '17. UNIQUE IDENTIFIER – 2D BARCODE' | Matched :- 'True'
2021-05-20 16:48:39,212 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22019' | prevHeadingCurrId :- '22018' | prevHeadingFoundId :- '22018'
 IDENTIFIER – HUMAN READABLE DATA' | Qrd txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Matched :- 'True'00)|0.995| | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '18.       UNIQUE
2021-05-20 16:48:39,611 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22020' | prevHeadingCurrId :- '22019' | prevHeadingFoundId :- '22019'
2021-05-20 16:48:40,015 : Heading Extraction Kalydeco_cle

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-05-20 16:48:40,899 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '3. LIST OF EXCIPIENTS' | Qrd txt :- '3. LIST OF EXCIPIENTS' | Matched :- 'True'
2021-05-20 16:48:40,912 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22005' | prevHeadingCurrId :- '22004' | prevHeadingFoundId :- '22004'
2021-05-20 16:48:41,314 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '4. PHARMACEUTICAL FORM AND CONTENTS' | Qrd txt :- '4. PHARMACEUTICAL FORM AND CONTENTS' | Matched :- 'True'
2021-05-20 16:48:41,332 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22006' | prevHeadingCurrId :- '22005' | prevHeadingFoundId :- '22005'
2021-05-20 16:48:41,726 

2021-05-20 16:48:45,837 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- 'MINIMUM PARTICULARS TO APPEAR ON SMALL IMMEDIATE PACKAGING UNITS' | Qrd txt :- 'MINIMUM PARTICULARS TO APPEAR ON SMALL IMMEDIATE PACKAGING UNITS' | Matched :- 'True'
2021-05-20 16:48:45,850 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Flow Is Broken | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22027' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '22020'
2021-05-20 16:48:45,859 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22027' | prevHeadingCurrId :- '22020' | prevHeadingFoundId :- '22020'
2021-05-20 16:48:46,159 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT AND ROUTE(


OriginalCheck



2021-05-20 16:48:46,766 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed : <=4|7.14|(93, 93, 93)|0.971| | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '3.         EXPIRY DATE' | Qrd txt :- '8. EXPIRY DATE' | Matched :- 'True'
2021-05-20 16:48:46,779 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22010' | prevHeadingCurrId :- '22009' | prevHeadingFoundId :- '22029'
2021-05-20 16:48:46,858 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '3. EXPIRY DATE' | Qrd txt :- '3. EXPIRY DATE' | Matched :- 'True'
2021-05-20 16:48:46,873 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Failed As Wrong Heading Found | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22024' | prevHeadingCurrId :- '' | prevHeading

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-05-20 16:48:47,942 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-05-20 16:48:47,952 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22003' | prevHeadingCurrId :- '22002' | prevHeadingFoundId :- '22002'
 ACTIVE SUBSTANCE' | Qrd txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Matched :- 'True'ed : <=7|9.38|(96, 100, 97)|0.99| | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '2.       STATEMENT OF
2021-05-20 16:48:48,223 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22004' | prevHeadingCurrId :- '22003' | prevHeadingFoundId :- '22003'
2021-05-20 16:48:48,400 : Heading Extraction Kalydeco_cle

2021-05-20 16:48:52,158 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '17. UNIQUE IDENTIFIER – 2D BARCODE' | Qrd txt :- '17. UNIQUE IDENTIFIER – 2D BARCODE' | Matched :- 'True'
2021-05-20 16:48:52,170 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22019' | prevHeadingCurrId :- '22018' | prevHeadingFoundId :- '22018'
2021-05-20 16:48:52,502 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed : <=7|2.33|(98, 98, 100)|0.995| | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '18.       UNIQUE IDENTIFIER – HUMAN READABLE DATA' | Qrd txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Matched :- 'True'
2021-05-20 16:48:52,513 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22020' | prevH

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-05-20 16:48:53,743 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '3. LIST OF EXCIPIENTS' | Qrd txt :- '3. LIST OF EXCIPIENTS' | Matched :- 'True'
2021-05-20 16:48:53,754 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22005' | prevHeadingCurrId :- '22004' | prevHeadingFoundId :- '22004'
2021-05-20 16:48:54,096 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '4. PHARMACEUTICAL FORM AND CONTENTS' | Qrd txt :- '4. PHARMACEUTICAL FORM AND CONTENTS' | Matched :- 'True'
2021-05-20 16:48:54,107 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22006' | prevHeadingCurrId :- '22005' | prevHeadingFoundId :- '22005'
2021-05-20 16:48:54,402 

2021-05-20 16:48:57,862 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- 'MINIMUM PARTICULARS TO APPEAR ON SMALL IMMEDIATE PACKAGING UNITS' | Qrd txt :- 'MINIMUM PARTICULARS TO APPEAR ON SMALL IMMEDIATE PACKAGING UNITS' | Matched :- 'True'
2021-05-20 16:48:57,874 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Flow Is Broken | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22027' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '22020'
2021-05-20 16:48:57,881 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22027' | prevHeadingCurrId :- '22020' | prevHeadingFoundId :- '22020'
2021-05-20 16:48:58,138 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT AND ROUTE(


OriginalCheck



2021-05-20 16:48:58,705 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22010' | prevHeadingCurrId :- '22009' | prevHeadingFoundId :- '22029'
2021-05-20 16:48:58,770 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '3. EXPIRY DATE' | Qrd txt :- '3. EXPIRY DATE' | Matched :- 'True'
2021-05-20 16:48:58,780 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Failed As Wrong Heading Found | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22024' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '22029'
2021-05-20 16:48:58,805 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '3. EXPIRY DATE' | Qrd txt :- '3. EXPIRY DATE' | Matched :- 'True'
2021-05-20 16:48:58,

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-05-20 16:48:59,826 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-05-20 16:48:59,836 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22003' | prevHeadingCurrId :- '22002' | prevHeadingFoundId :- '22002'
 ACTIVE SUBSTANCE' | Qrd txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Matched :- 'True'ed : <=7|9.38|(96, 100, 97)|0.99| | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '2.       STATEMENT OF
2021-05-20 16:49:00,131 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22004' | prevHeadingCurrId :- '22003' | prevHeadingFoundId :- '22003'
2021-05-20 16:49:00,325 : Heading Extraction Kalydeco_cle

2021-05-20 16:49:03,999 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '17. UNIQUE IDENTIFIER – 2D BARCODE' | Qrd txt :- '17. UNIQUE IDENTIFIER – 2D BARCODE' | Matched :- 'True'
2021-05-20 16:49:04,011 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22019' | prevHeadingCurrId :- '22018' | prevHeadingFoundId :- '22018'
2021-05-20 16:49:04,357 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed : <=7|2.33|(98, 98, 100)|0.995| | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '18.       UNIQUE IDENTIFIER – HUMAN READABLE DATA' | Qrd txt :- '18. UNIQUE IDENTIFIER - HUMAN READABLE DATA' | Matched :- 'True'
2021-05-20 16:49:04,369 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22020' | prevH

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-05-20 16:49:05,712 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '3. LIST OF EXCIPIENTS' | Qrd txt :- '3. LIST OF EXCIPIENTS' | Matched :- 'True'
2021-05-20 16:49:05,725 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22005' | prevHeadingCurrId :- '22004' | prevHeadingFoundId :- '22004'
2021-05-20 16:49:06,060 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '4. PHARMACEUTICAL FORM AND CONTENTS' | Qrd txt :- '4. PHARMACEUTICAL FORM AND CONTENTS' | Matched :- 'True'
2021-05-20 16:49:06,071 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22006' | prevHeadingCurrId :- '22005' | prevHeadingFoundId :- '22005'
2021-05-20 16:49:06,361 

2021-05-20 16:49:09,824 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- 'MINIMUM PARTICULARS TO APPEAR ON SMALL IMMEDIATE PACKAGING UNITS' | Qrd txt :- 'MINIMUM PARTICULARS TO APPEAR ON SMALL IMMEDIATE PACKAGING UNITS' | Matched :- 'True'
2021-05-20 16:49:09,837 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Flow Is Broken | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22027' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '22020'
2021-05-20 16:49:09,845 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22027' | prevHeadingCurrId :- '22020' | prevHeadingFoundId :- '22020'
2021-05-20 16:49:10,093 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT AND ROUTE(


OriginalCheck



2021-05-20 16:49:10,663 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22010' | prevHeadingCurrId :- '22009' | prevHeadingFoundId :- '22029'
2021-05-20 16:49:10,729 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '3. EXPIRY DATE' | Qrd txt :- '3. EXPIRY DATE' | Matched :- 'True'
2021-05-20 16:49:10,741 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Validation Failed As Wrong Heading Found | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | currHeadId :- '22024' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '22029'
2021-05-20 16:49:10,766 : Heading Extraction Kalydeco_clean_ANNEX III.json_c : Match Passed | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json | Doc txt :- '3. EXPIRY DATE' | Qrd txt :- '3. EXPIRY DATE' | Matched :- 'True'
2021-05-20 16:49:10,


All mandatory headings have been found !!!

dict_keys([])
Completed Heading Extraction For File
Metrics : 2: Heading Extraction,1.0453 Min,2.183945 MB,3.856857 MB,65.9%

Starting Document Annotation For File :- Kalydeco_clean_ANNEX III.json
Error Found
Completed Document Annotation
Metrics : 2: Document Annotation,0.0001 Min,0.175506 MB,0.177811 MB,65.9%

Starting Extracting Content Between Heading For File :- Kalydeco_clean_ANNEX III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\partitionedJSONs\Kalydeco_clean_ANNEX III.json
--------------------------------------------


2021-05-20 16:49:12,199 : Flow Logger HTML_t : Completed Extracting Content Between Heading | H | CAP |  en | HTML | Kalydeco_clean.htm
2021-05-20 16:49:12,201 : Flow Logger HTML_t : 2: Content Extraction,0.0129 Min,0.622184 MB,3.099566 MB,65.9%
 | H | CAP |  en | HTML | Kalydeco_clean.htm
2021-05-20 16:49:12,226 : XmlGeneration_2_M : PMS/OMS Annotation Information Not Retrieved | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json
2021-05-20 16:49:12,228 : XmlGeneration_2_M : Initiating XML Generation | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json


Completed Extracting Content Between Heading
Metrics : 2: Content Extraction,0.0129 Min,0.622184 MB,3.099566 MB,65.9%

Already Exists


2021-05-20 16:49:12,434 : XmlGeneration_2_M : Writing to File:Kalydeco_clean_ANNEX III.xml | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json
2021-05-20 16:49:12,438 : Flow Logger HTML_t : 2: Generate XML,0.0039 Min,2.483918 MB,4.624969 MB,66.0%
 | H | CAP |  en | HTML | Kalydeco_clean.htm


Metrics : 2: Generate XML,0.0039 Min,2.483918 MB,4.624969 MB,66.0%



2021-05-20 16:49:15,007 : XML Submission Logger_2_5 : Initiating Submission To FHIR Server | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json
2021-05-20 16:49:15,008 : XML Submission Logger_2_5 : Response{"resourceType":"Bundle","id":"487c7994-720a-4162-a893-b63d6fbef055","meta":{"versionId":"1","lastUpdated":"2021-05-20T11:19:14.234+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:5c9afeed-ab2a-435b-9595-800fbe23223a","resource":{"resourceType":"Bundle","id":"542f04c0-9273-458a-8fd2-33daee215804","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-05-20T11:19:12+00:00","entry":[{"fullUr | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json
2021-05-20 16:49:15,017 : XML Submission Logger_2_5 : POST sucessful: XML added with id: 487c7994-720a-4162-a893-b63d6fbef055 | H | CAP |  en | 2 | Kalydeco_clean_ANNEX III.json
2021-05-20 16:49:15,019 : Flow Logger HTML_t : 2: Submit FHIR Ms

POST sucessful: XML added with id 487c7994-720a-4162-a893-b63d6fbef055
Metrics : 2: Submit FHIR Msg,0.0429 Min,0.069677 MB,1.399503 MB,66.1%

Created XML File For :- Kalydeco_clean_ANNEX III.json
Starting Heading Extraction For File :- Kalydeco_clean_ PACKAGE LEAFLET.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Kalydeco\en\2021-05-19T09-22-20Z\partitionedJSONs\Kalydeco_clean_ PACKAGE LEAFLET.json
--------------------------------------------
Package leaflet


2021-05-20 16:49:15,457 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Started Extracting Heading | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json
2021-05-20 16:49:15,481 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Match Passed : <=4|16.67|(91, 100, 95)|0.913| | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- 'B. PACKAGE LEAFLET' | Qrd txt :- 'PACKAGE LEAFLET' | Matched :- 'True'
2021-05-20 16:49:15,489 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed As This The First Heading | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-05-20 16:49:19,011 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Match Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- 'What is in this leaflet' | Qrd txt :- 'What is in this leaflet' | Matched :- 'True'
2021-05-20 16:49:19,019 : Head

----------------------------------
RemovedByStyle
----------------------------------


2021-05-20 16:49:19,344 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Match Passed : Contains<>|16.33|(92, 86, 95)|0.977| | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- '2.   What you need to know before you take Kalydeco' | Qrd txt :- '2. What you need to know before you <take> <use> Kalydeco ' | Matched :- 'True'
2021-05-20 16:49:19,353 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Flow Is Broken | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-05-20 16:49:19,360 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-05-20 16:49:19,367 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Failed By Style | H | CAP |  en | 3 | Kaly

----------------------------------
RemovedByStyle
----------------------------------


2021-05-20 16:49:19,674 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Match Passed : Contains<>|34.78|(85, 70, 95)|0.951| | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- '3.   How to take Kalydeco' | Qrd txt :- '3. How to <take> <use> Kalydeco ' | Matched :- 'True'
2021-05-20 16:49:19,685 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Flow Is Broken | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23014' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-05-20 16:49:19,691 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23014' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-05-20 16:49:19,698 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Failed By Style | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23

----------------------------------
RemovedByStyle
----------------------------------


2021-05-20 16:49:19,893 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Match Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- '4. Possible side effects' | Qrd txt :- '4. Possible side effects' | Matched :- 'True'
2021-05-20 16:49:19,904 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Flow Is Broken | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23019' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-05-20 16:49:19,912 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23019' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-05-20 16:49:19,918 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Failed By Style | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23019' | prevHeadingCurrId :- '' | prevHeadingFoun

----------------------------------
RemovedByStyle
----------------------------------


2021-05-20 16:49:20,115 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Flow Is Broken | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-05-20 16:49:20,122 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23022' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-05-20 16:49:20,130 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Failed By Style | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'


----------------------------------
RemovedByStyle
----------------------------------


2021-05-20 16:49:20,384 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Match Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- '6. Contents of the pack and other information' | Qrd txt :- '6. Contents of the pack and other information' | Matched :- 'True'
2021-05-20 16:49:20,397 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Flow Is Broken | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23023' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-05-20 16:49:20,405 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23023' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-05-20 16:49:20,413 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Failed By Style | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23023' |

----------------------------------
RemovedByStyle
----------------------------------


 Kalydeco' | Qrd txt :- '2. What you need to know before you <take> <use> Kalydeco ' | Matched :- 'True'tains<>|18.75|(91, 85, 95)|0.911| | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- '2.What you need to know before you take
2021-05-20 16:49:20,797 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '23004' | prevHeadingFoundId :- '23004'
2021-05-20 16:49:20,832 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Match Passed : Contains<>|40.0|(83, 65, 95)|0.944| | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Do not take Kalydeco' | Qrd txt :- 'Do not <take> <use> Kalydeco' | Matched :- 'True'
2021-05-20 16:49:20,844 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23006' | prevHeadingCur

----------------------------------
RemovedByStyle
----------------------------------


 (23 mg) per dose, that is to say essentially ‘sodium‑free’.' | Qrd txt :- 'Kalydeco contains {name the excipient(s)}' | Matched :- 'True'P |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Kalydeco contains less than 1 mmol sodium
2021-05-20 16:49:37,053 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23013' | prevHeadingCurrId :- '23012' | prevHeadingFoundId :- '23012'
2021-05-20 16:49:37,061 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Failed By Style | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23013' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23012'


----------------------------------
RemovedByStyle
----------------------------------


2021-05-20 16:49:37,474 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Match Passed : Contains<>|40.91|(83, 68, 95)|0.883| | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- '3.How to take Kalydeco' | Qrd txt :- '3. How to <take> <use> Kalydeco ' | Matched :- 'True'
2021-05-20 16:49:37,486 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Flow Is Broken | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23014' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23012'
2021-05-20 16:49:37,493 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23014' | prevHeadingCurrId :- '23012' | prevHeadingFoundId :- '23012'
2021-05-20 16:49:41,059 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Match Passed : Contains<>|120.0|(62, 100, 90)|0.891| | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.j

2021-05-20 16:50:16,146 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Match Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- 'What is in this leaflet' | Qrd txt :- 'What is in this leaflet' | Matched :- 'True'
2021-05-20 16:50:16,159 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Failed As Wrong Heading Found | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23003' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23029'
2021-05-20 16:50:16,319 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : End Of Sub Section | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json
2021-05-20 16:50:16,338 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Match Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- 'What is in this leaflet' | Qrd txt :- 'What is in this leaflet' | Matched :- 'True'
2021-05-20 16:50:16,346 : Heading Extraction Kalydeco_cl

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
----------------------------------
RemovedByStyle
----------------------------------


 Kalydeco' | Qrd txt :- '2. What you need to know before you <take> <use> Kalydeco ' | Matched :- 'True'tains<>|19.3|(88, 88, 89)|0.96| | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- '2.   What you need to know before your child takes
2021-05-20 16:50:16,696 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Flow Is Broken | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-05-20 16:50:16,703 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-05-20 16:50:16,709 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Failed By Style | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '' | prevHeadingFo

----------------------------------
RemovedByStyle
----------------------------------


2021-05-20 16:50:17,025 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Match Passed : Contains<>|34.78|(85, 70, 95)|0.951| | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- '3.   How to take Kalydeco' | Qrd txt :- '3. How to <take> <use> Kalydeco ' | Matched :- 'True'
2021-05-20 16:50:17,036 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Flow Is Broken | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23014' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-05-20 16:50:17,043 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23014' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-05-20 16:50:17,050 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Failed By Style | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23

----------------------------------
RemovedByStyle
----------------------------------


2021-05-20 16:50:17,251 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Match Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- '4. Possible side effects' | Qrd txt :- '4. Possible side effects' | Matched :- 'True'
2021-05-20 16:50:17,261 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Flow Is Broken | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23019' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-05-20 16:50:17,268 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23019' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-05-20 16:50:17,275 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Failed By Style | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23019' | prevHeadingCurrId :- '' | prevHeadingFoun

----------------------------------
RemovedByStyle
----------------------------------


2021-05-20 16:50:17,466 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Flow Is Broken | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-05-20 16:50:17,473 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23022' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-05-20 16:50:17,479 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Failed By Style | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'


----------------------------------
RemovedByStyle
----------------------------------


2021-05-20 16:50:17,717 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Match Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- '6. Contents of the pack and other information' | Qrd txt :- '6. Contents of the pack and other information' | Matched :- 'True'
2021-05-20 16:50:17,726 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Flow Is Broken | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23023' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-05-20 16:50:17,734 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23023' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-05-20 16:50:17,740 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Failed By Style | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23023' |

----------------------------------
RemovedByStyle
----------------------------------


 takes Kalydeco' | Qrd txt :- '2. What you need to know before you <take> <use> Kalydeco ' | Matched :- 'True'>|21.43|(87, 86, 88)|0.897| | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- '2.What you need to know before your child
2021-05-20 16:50:17,937 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '23004' | prevHeadingFoundId :- '23004'
2021-05-20 16:50:17,973 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Match Passed : Contains<>|40.0|(83, 65, 95)|0.944| | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Do not take Kalydeco' | Qrd txt :- 'Do not <take> <use> Kalydeco' | Matched :- 'True'
2021-05-20 16:50:17,983 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23006' | prevHeadingC

----------------------------------
RemovedByStyle
----------------------------------


2021-05-20 16:50:34,458 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Match Passed : Contains<>|40.91|(83, 68, 95)|0.883| | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | Doc txt :- '3.How to take Kalydeco' | Qrd txt :- '3. How to <take> <use> Kalydeco ' | Matched :- 'True'
2021-05-20 16:50:34,469 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Flow Is Broken | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23014' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23012'
2021-05-20 16:50:34,477 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Validation Passed | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json | currHeadId :- '23014' | prevHeadingCurrId :- '23012' | prevHeadingFoundId :- '23012'
2021-05-20 16:50:44,701 : Heading Extraction Kalydeco_clean_ PACKAGE LEAFLET.json_2 : Match Passed : Contains<>|32.69|(77, 73, 79)|0.905| | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.js

2021-05-20 16:51:12,969 : Flow Logger HTML_t : Starting Extracting Content Between Heading | H | CAP |  en | HTML | Kalydeco_clean.htm
2021-05-20 16:51:12,974 : ExtractContentBetween_3_X : Cleaning Match Results | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json
2021-05-20 16:51:12,974 : ExtractContentBetween_3_X : Cleaning Match Results | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json
2021-05-20 16:51:12,981 : ExtractContentBetween_3_X : Finished Cleaning Match Results | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json
2021-05-20 16:51:12,981 : ExtractContentBetween_3_X : Finished Cleaning Match Results | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json
2021-05-20 16:51:13,034 : Flow Logger HTML_t : Completed Extracting Content Between Heading | H | CAP |  en | HTML | Kalydeco_clean.htm
2021-05-20 16:51:13,035 : Flow Logger HTML_t : 3: Content Extraction,0.0011 Min,0.284875 MB,1.321551 MB,65.8%
 | H | CAP |  en | HTML | Kalydeco_clean.htm
2021-05-20 1



Heading Not Found 
 ['q This medicine is subject to additional monitoring. This will allow quick identification of new safety information. You can help by reporting any side effects you may get. See the end of section 4 for how to report side effects.', 'X contains {name the excipient(s)}', 'Marketing Authorisation Holder and Manufacturer', 'For any information about this medicine, please contact the local representative of the Marketing Authorisation Holder:', 'The following information is intended for healthcare professionals only:']


dict_keys(['1. What Kalydeco is and what it is used for', '2. What you need to know before you <take> <use> Kalydeco ', '3. How to <take> <use> Kalydeco ', '4. Possible side effects', '5. How to store Kalydeco', '6. Contents of the pack and other information', 'Kalydeco contains {name the excipient(s)}'])
Completed Heading Extraction For File
Metrics : 3: Heading Extraction,1.9655 Min,0.893655 MB,3.277257 MB,65.8%

Starting Document Annotation For Fi

2021-05-20 16:51:13,253 : XmlGeneration_3_h : Writing to File:Kalydeco_clean_ PACKAGE LEAFLET.xml | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json
2021-05-20 16:51:13,253 : XmlGeneration_3_h : Writing to File:Kalydeco_clean_ PACKAGE LEAFLET.xml | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json
2021-05-20 16:51:13,258 : Flow Logger HTML_t : 3: Generate XML,0.0037 Min,2.045585 MB,3.681445 MB,65.8%
 | H | CAP |  en | HTML | Kalydeco_clean.htm


Metrics : 3: Generate XML,0.0037 Min,2.045585 MB,3.681445 MB,65.8%



2021-05-20 16:51:15,498 : XML Submission Logger_3_B : Initiating Submission To FHIR Server | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json
2021-05-20 16:51:15,501 : XML Submission Logger_3_B : Response{"resourceType":"Bundle","id":"72f82a5e-dee3-4e3e-a65e-0332e8a03691","meta":{"versionId":"1","lastUpdated":"2021-05-20T11:21:14.915+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:8caedb88-bc64-4fc1-b693-d61b0d96122b","resource":{"resourceType":"Bundle","id":"66c09c27-3327-49fc-91f0-ca99c95a664b","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-05-20T11:21:13+00:00","entry":[{"fullUr | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json
2021-05-20 16:51:15,515 : XML Submission Logger_3_B : POST sucessful: XML added with id: 72f82a5e-dee3-4e3e-a65e-0332e8a03691 | H | CAP |  en | 3 | Kalydeco_clean_ PACKAGE LEAFLET.json
2021-05-20 16:51:15,517 : Flow Logger HTML_

POST sucessful: XML added with id 72f82a5e-dee3-4e3e-a65e-0332e8a03691
Metrics : 3: Submit FHIR Msg,0.0376 Min,0.205618 MB,1.275539 MB,66.7%

Created XML File For :- Kalydeco_clean_ PACKAGE LEAFLET.json
Metrics : 3: Completed,0.0 Min,0.150285 MB,0.152681 MB,66.7%

Metrics : Final Metrics,5.0425 Min,0.0 MB,20.389168 MB,66.7%



In [20]:
a

Unnamed: 0,Bold,Classes,Element,HasBorder,ID,Indexed,IsHeadingType,IsListItem,IsPossibleHeading,Italics,ParentId,Styles,Text,Underlined,Uppercased,StringLength
0,True,['MsoNormal'],"<p align=""center"" class=""MsoNormal"" style=""margin-top:0in;margin-right:-.1pt;\r margin-bottom:0in;margin-left:27.0pt;text-align:center;text-indent:-27.0pt;\r line-height:normal""><b><span lang=""EN-...",False,320efb82-a156-45a3-afdd-0e3a7e54d6da,False,,False,True,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:-.1pt;\r\nmargin-bottom:0in;margin-left:27.0pt;text-align:center;text-indent:-27.0pt;\r\nline-height:normal,ANNEX\r II,False,True,8
1,False,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.1pt;margin-bottom:0in;\r margin-left:0in;line-height:normal""></p>",False,f79ed8ed-bfc4-4e8b-bd49-ede302291e8a,False,,False,False,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:-.1pt;margin-bottom:0in;\r\nmargin-left:0in;line-height:normal,,False,False,0
2,True,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""><b><span lang=""EN-GB"" style='font-family:""Times New ...",False,7b2ff4b3-f27a-47ca-96a9-7ae24cfdefab,True,L1,False,True,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,A. MANUFACTURER(S) OF\r THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND MANUFACTURER(S) RESPONSIBLE FOR BATCH\r RELEASE,False,True,106
3,False,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""></p>",False,d320b69e-a3c5-4764-aa31-251a32af8a17,False,,False,False,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,,False,False,0
4,True,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""><b><span lang=""EN-GB"" style='font-family:""Times New ...",False,e67b4687-f1a0-4608-988a-bcbb07029721,True,L1,False,True,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,B. CONDITIONS OR\r RESTRICTIONS REGARDING SUPPLY AND USE,False,True,54
5,False,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""></p>",False,a9013513-966a-4638-818e-90def54e0698,False,,False,False,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,,False,False,0
6,True,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""><b><span lang=""EN-GB"" style='font-family:""Times New ...",False,f9007321-3055-4120-b8ed-79e72cb470ae,True,L1,False,True,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,C. OTHER CONDITIONS AND\r REQUIREMENTS OF THE MARKETING AUTHORISATION,False,True,67
7,False,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""></p>",False,1b70d864-ef01-4bad-91f3-54dcbbe56cef,False,,False,False,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,,False,False,0
8,True,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""><b><span lang=""EN-GB"" style='font-family:""Times New ...",False,71e9cc22-be86-4f43-b932-f89b799916ba,True,L1,False,True,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,D. CONDITIONS OR\r RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT,False,True,96
9,False,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""></p>",False,d7fd1fa1-ea7c-4b20-8b1b-11cbcafde70c,False,,False,False,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,,False,False,0


In [69]:
convertCollectionToDataFrame(b)

NameError: name 'b' is not defined