In [49]:
import os
import zipfile
%load_ext autoreload

%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [50]:
import tracemalloc
import psutil
import pprint
import pandas as pd
import uuid
import json
import os
import glob
import re
import sys
from bs4 import NavigableString, BeautifulSoup
from collections import defaultdict
import random
import string
import time

from utils.config import config
from utils.logger.logger import loggerCreator

# ePI Modules
from parse.rulebook.rulebook import StyleRulesDictionary

from parse.extractor.parser import parserExtractor
from match.matchDocument.matchDocument import MatchDocument
from documentAnnotation.documentAnnotation import DocumentAnnotation
from htmlDocTypePartitioner.partition import DocTypePartitioner
from extractContentBetweenHeadings.dataBetweenHeadingsExtractor import DataBetweenHeadingsExtractor
from fhirXmlGenerator.fhirXmlGenerator import FhirXmlGenerator
from fhirService.fhirService import FhirService
from utils.logger.matchLogger import MatchLogger
from languageInfo.documentTypeNames.documentTypeNames import DocumentTypeNames


class FolderNotFoundError(Exception):
    pass

class Metrics:
    
    def __init__(self, logFileName, logger):
        self.logFileName = logFileName
        self.start()
        self.writer = open(self.logFileName, 'a')
        self.writer.write("StepName,Time,Current Memory,Peak Memory,Used Ram Percentage\n")
        self.finalPeak = 0
        self.finalTotalTime = 0
        self.finalUsedRamPerc = 0
        self.logger = logger
    
    def start(self):
        self.startTime = time.time()
        tracemalloc.start()
    
    def getMetric(self, msg):
        
        self.endTime = time.time()
        
        self.totalTime = self.endTime - self.startTime
        
        
        current, peak = tracemalloc.get_traced_memory()
        current = current / 10**6
        peak = peak / 10**6
        
        usedRamPerc = psutil.virtual_memory()[2]
        
        self.finalPeak = max(self.finalPeak, peak)
        self.finalUsedRamPerc = max(self.finalUsedRamPerc, usedRamPerc)

        self.finalTotalTime = self.finalTotalTime + self.totalTime
        #self.finalTotalTime = round(self.finalTotalTime/60,3)
        
        outputString = f"{msg},{round(self.totalTime/60,4)} Min,{current} MB,{peak} MB,{usedRamPerc}%\n"
        
        self.logger.logFlowCheckpoint(f"{outputString}")
        
        print(f"Metrics : {outputString}")
        self.writer.write(outputString)
        tracemalloc.stop()
        tracemalloc.start()
        self.startTime = time.time()
    def end(self):
        
        current, peak = tracemalloc.get_traced_memory()
        current = current / 10**6
        outputString = f"Final Metrics,{round(self.finalTotalTime/60,4)} Min,{current} MB,{self.finalPeak} MB,{self.finalUsedRamPerc}%\n"
        print(f"Metrics : {outputString}")
        self.logger.logFlowCheckpoint(f"{outputString}")
        self.writer.write(outputString)
        self.writer.close()
        tracemalloc.stop()
        
        


def convertToInt(x):
    try:
        return str(int(x))
    except:
        return x


def convertCollectionToDataFrame(collection):

    dfExtractedHier = pd.DataFrame(collection)
    dfExtractedHier['parent_id'] = dfExtractedHier['parent_id'].apply(
        lambda x: convertToInt(x))
    dfExtractedHier['id'] = dfExtractedHier['id'].apply(
        lambda x: convertToInt(x))

    return dfExtractedHier

def getRandomString(N):
    str_ = ''.join(random.choice(string.ascii_uppercase + string.digits
                                 + string.ascii_lowercase) for _ in range(N))
    return str_


def convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog):

    module_path = os.path.join(basePath)

    if "/" in basePath:
        pathSep = "/"
    else:
        pathSep = "\\"
    
    # Generate output folder path
    output_json_path = os.path.join(basePath, 'outputJSON')

    """
        Check if input folder exists, else throw exception
    """
    if(os.path.exists(module_path)):
        filenames = glob.glob(os.path.join(module_path, htmlDocName))

        # Create language specific folder in outputJSON folder if it doesn't exist
        if(not os.path.exists(output_json_path)):
            os.mkdir(output_json_path)
        logger = MatchLogger(f'Parser_{getRandomString(1)}', htmlDocName,
                             domain, procedureType, languageCode, "HTML", fileNameLog)

        styleLogger = MatchLogger(
            f'Style Dictionary_{getRandomString(1)}', htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)

        styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                             controlBasePath=controlBasePath,
                                             language=languageCode,
                                             fileName=fileNameQrd,
                                             domain=domain,
                                             procedureType=procedureType
                                             )

        parserObj = parserExtractor(config, logger, styleRulesObj.styleRuleDict,
                                    styleRulesObj.styleFeatureKeyList,
                                    styleRulesObj.qrd_section_headings)

        for input_filename in filenames:
          # if(input_filename.find('Kalydeco II-86-PI-clean')!=-1):
            output_filename = os.path.join(output_json_path, htmlDocName)
            style_filepath =  output_filename.replace('.html','.txt')
            style_filepath =  style_filepath.replace('.txtl','.txt')
            style_filepath =  style_filepath.replace('.htm','.txt')
            print("-------------",style_filepath,"-----------------")

            output_filename = output_filename.replace('.html', '.json')
            output_filename = output_filename.replace('.htm', '.json')
            print(input_filename, output_filename)
            parserObj.createPIJsonFromHTML(input_filepath=input_filename,
                                           output_filepath=output_filename,
                                           style_filepath = style_filepath,
                                           img_base64_dict=parserObj.convertImgToBase64(input_filename)
                                           )
            
        return output_filename.split(pathSep)[-1], style_filepath
    else:
        try:    
            raise FolderNotFoundError(module_path + " not found")
        except:  
            logger.logFlowCheckpoint("Folder For Language Code Not Found In Input File")
            logger.logException("Folder For Language Code Not Found In Input File")
        raise FolderNotFoundError(module_path + " not found")
        return None


def splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog):

    styleLogger = MatchLogger(
        f'Style Dictionary_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                        controlBasePath=controlBasePath,
                                        language=languageCode,
                                        fileName=fileNameQrd,
                                        domain=domain,
                                        procedureType=procedureType
                                        )
    
    path_json = os.path.join(basePath,'outputJSON', fileNameJson)
    print("PathJson",path_json)
    partitionLogger = MatchLogger(
        f'Partition_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    partitioner = DocTypePartitioner(partitionLogger)

    partitionedJsonPaths = partitioner.partitionHtmls(
        styleRulesObj.qrd_section_headings, path_json)

    return partitionedJsonPaths


def extractAndValidateHeadings(controlBasePath,
                                basePath,
                                domain,
                                procedureType,
                                languageCode,
                                documentNumber,
                                fileNameDoc,
                                fileNameQrd,
                                fileNameMatchRuleBook,
                                fileNameDocumentTypeNames,
                                fileNameLog,
                                stopWordFilterLen=6,
                                isPackageLeaflet=False,
                                medName=None
                                ):

    if documentNumber == 0:
        topHeadingsConsidered = 4
        bottomHeadingsConsidered = 6
    elif documentNumber == 1:
        topHeadingsConsidered = 3
        bottomHeadingsConsidered = 5
    elif documentNumber == 2:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 15
    else:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 10

    print(f"Starting Heading Extraction For File :- {fileNameDoc}")
    logger = MatchLogger(f"Heading Extraction {fileNameDoc}_{getRandomString(1)}", fileNameDoc, domain, procedureType, languageCode, documentNumber, fileNameLog)
    logger.logFlowCheckpoint("Starting Heading Extraction")

    stopWordlanguage = DocumentTypeNames(
        controlBasePath=controlBasePath,
        fileNameDocumentTypeNames=fileNameDocumentTypeNames,
        languageCode=languageCode,
        domain=domain,
        procedureType=procedureType,
        documentNumber=documentNumber
        ).extractStopWordLanguage()

    matchDocObj = MatchDocument(
        logger,
        controlBasePath,
        basePath,
        domain,
        procedureType,
        languageCode,
        documentNumber,
        fileNameDoc,
        fileNameQrd,
        fileNameMatchRuleBook,
        fileNameDocumentTypeNames,
        topHeadingsConsidered,
        bottomHeadingsConsidered,
        stopWordFilterLen,
        stopWordlanguage,
        isPackageLeaflet,
        medName)
    df, coll, documentType = matchDocObj.matchHtmlHeaddingsWithQrd()

    return df, coll, documentType


def parseDocument(controlBasePath, basePath ,htmlDocName, fileNameQrd, fileNameMatchRuleBook, fileNameDocumentTypeNames, medName = None):
    
    
    if "/" in basePath:
        pathSep = "/"        
    else:
        pathSep = "\\"
    
    fileNameLog = os.path.join(basePath,'FinalLog.txt')

    pathComponents = basePath.split(pathSep)
    print(pathComponents, htmlDocName)
    timestamp = pathComponents[-1]
    languageCode =  pathComponents[-2]
    medName = pathComponents[-3]
    procedureType = pathComponents[-4]
    domain = pathComponents[-5]

    print(timestamp, languageCode, medName, procedureType, domain)
        
    flowLogger =  MatchLogger(f"Flow Logger HTML_{getRandomString(1)}", htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)
    
    metrics = Metrics(os.path.join(basePath,'Metrics.csv'),flowLogger)
    
    
    flowLogger.logFlowCheckpoint("Starting HTML Conversion To Json")
    ###Convert Html to Json
    fileNameJson, stylesFilePath = convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog)
    
    print("stylePath:-",stylesFilePath)
    flowLogger.logFlowCheckpoint("Completed HTML Conversion To Json")
    metrics.getMetric("HTML Conversion To Json")

    flowLogger.logFlowCheckpoint("Starting Json Split")

    ###Split Uber Json to multiple Jsons for each category.
    partitionedJsonPaths = splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog)
    
    partitionedJsonPaths = [ path.split(pathSep)[-1] for path in partitionedJsonPaths]
    flowLogger.logFlowCheckpoint(str(partitionedJsonPaths))
    
    flowLogger.logFlowCheckpoint("Completed Json Split")
    metrics.getMetric("Split Json")
    
    flowLogger.logFlowCheckpoint("Started Processing Partitioned Jsons")
    
    for index, fileNamePartitioned in enumerate(partitionedJsonPaths):
        flowLogger.logFlowCheckpoint(f"\n\n\n\n||||||||||||||||||||||||||||||||{str(index)} ||||| {str(fileNamePartitioned)}||||||||||||||||||||||||||||||||\n\n\n\n")
        
        if index == 3:
            stopWordFilterLen = 100
            isPackageLeaflet = True
        else:
            stopWordFilterLen = 6
            isPackageLeaflet = False
            
        df, coll, documentType = extractAndValidateHeadings(controlBasePath,
                                    basePath,
                                    domain,
                                    procedureType,
                                    languageCode,
                                    index,
                                    fileNamePartitioned,
                                    fileNameQrd,
                                    fileNameMatchRuleBook,
                                    fileNameDocumentTypeNames,
                                    fileNameLog,
                                    stopWordFilterLen=stopWordFilterLen,
                                    isPackageLeaflet=isPackageLeaflet,
                                    medName=medName)
        
        
        print(f"Completed Heading Extraction For File")
        flowLogger.logFlowCheckpoint("Completed Heading Extraction For File")
        metrics.getMetric(f"{index}: Heading Extraction")

        print(f"Starting Document Annotation For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Document Annotation For File")
        documentAnnotationObj = DocumentAnnotation(fileNamePartitioned,'c20835db4b1b4e108828a8537ff41506','https://spor-sit.azure-api.net/pms/api/v2/',df,coll)
        try:
            pms_oms_annotation_data = documentAnnotationObj.processRegulatedAuthorizationForDoc()
            print(pms_oms_annotation_data)
        except:
            pms_oms_annotation_data = None
            print("Error Found")
            
        print(f"Completed Document Annotation")        
        flowLogger.logFlowCheckpoint("Completed Document Annotation")
        metrics.getMetric(f"{index}: Document Annotation")
        
        print(f"Starting Extracting Content Between Heading For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Extracting Content Between Heading")
        
        extractContentlogger =  MatchLogger(f'ExtractContentBetween_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        extractorObj = DataBetweenHeadingsExtractor(extractContentlogger, basePath, coll)
        dfExtractedHierRR = extractorObj.extractContentBetweenHeadings(fileNamePartitioned)
        
        print(f"Completed Extracting Content Between Heading")        
        flowLogger.logFlowCheckpoint("Completed Extracting Content Between Heading")
        metrics.getMetric(f"{index}: Content Extraction")
        
        xmlLogger =  MatchLogger(f'XmlGeneration_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        fhirXmlGeneratorObj = FhirXmlGenerator(xmlLogger, controlBasePath, basePath, pms_oms_annotation_data, stylesFilePath, medName)
        fileNameXml = fileNamePartitioned.replace('.json','.xml')
        generatedXml = fhirXmlGeneratorObj.generateXml(dfExtractedHierRR, fileNameXml)
        
        metrics.getMetric(f"{index}: Generate XML")
        
        fhirServiceLogger =  MatchLogger(f'XML Submission Logger_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)

        fhirServiceObj = FhirService(fhirServiceLogger, basePath, generatedXml)
        fhirServiceObj.submitFhirXml()
        
        metrics.getMetric(f"{index}: Submit FHIR Msg")
        
        print(f"Created XML File For :- {fileNamePartitioned}")      
        
        #return df,coll,dfExtractedHierRR
    
    
    flowLogger.logFlowCheckpoint("Completed Processing Partitioned Jsons")
    metrics.getMetric(f"{index}: Completed")
    metrics.end()

In [3]:
from wordToHtmlConvertor.wordToHtmlConvertor import WordToHtmlConvertor

wordToHtmlConvertorObj = WordToHtmlConvertor()
wordToHtmlConvertorObj.convertWordToHTML()

2021-05-19 00:50:50,658 : WordToHtmlLogger_M : Input file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\ABASAGLAR~H~CAP~en.docx | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-19 00:50:50,659 : WordToHtmlLogger_M : Output file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ABASAGLAR\en\2021-05-18T19-20-50Z\ABASAGLAR_clean | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx


Word Files in folder:  ['ABASAGLAR~H~CAP~en.docx', 'Abilify Maintena~H~CAP~en.doc', 'ABILIFY~H~CAP~en.doc', 'Adakveo~H~CAP~en.docx', 'Adcetris~H~CAP~en.doc', '~$ilify Maintena~H~CAP~en.doc']
Input file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\ABASAGLAR~H~CAP~en.docx
Output file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\ABASAGLAR\en\2021-05-18T19-20-50Z\ABASAGLAR_clean


2021-05-19 00:51:09,654 : WordToHtmlLogger_M : Opened file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\ABASAGLAR~H~CAP~en.docx | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx


Opened file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\ABASAGLAR~H~CAP~en.docx


2021-05-19 00:51:11,212 : WordToHtmlLogger_M : Starting document cleaning process | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx



Checking table 20
The selection starts on page 105 of 106 (69.44999694824219/70.75)
The selection ends on page 105 of 106 (384.8500061035156/70.75)
The selection contains
* overlay images

Checking table 19
The selection starts on page 103 of 106 (579.2000122070312/70.75)
The selection ends on page 105 of 106 (56.70000076293945/70.75)
The selection contains
* overlay images

Checking table 18
The selection starts on page 102 of 105 (564.5499877929688/214.75)
The selection ends on page 103 of 105 (421.79998779296875/70.75)
The selection contains
* overlay images
* overlay shapes

Checking table 17
The selection starts on page 102 of 105 (56.70000076293945/70.75)
The selection ends on page 102 of 105 (475.95001220703125/70.75)
The selection contains
* overlay images

Checking table 16
The selection starts on page 100 of 105 (577.9000244140625/70.75)
The selection ends on page 101 of 105 (484.25/70.75)
The selection contains
* overlay images

Checking table 15
The selection starts on pag

2021-05-19 00:51:51,072 : WordToHtmlLogger_M : Completed document cleaning process | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-19 00:51:51,078 : WordToHtmlLogger_M : Preparing zip file | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-19 00:51:52,742 : WordToHtmlLogger_M : Zip file created: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\ABASAGLAR~H~CAP~en~2021-05-18T19-20-50Z.zip | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-19 00:51:52,746 : WordToHtmlLogger_M : Uploading to Azure Storage as blob:
	F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\ABASAGLAR~H~CAP~en~2021-05-18T19-20-50Z.zip | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx



Uploading File to  Azure Storage:
	F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\ABASAGLAR~H~CAP~en~2021-05-18T19-20-50Z.zip


2021-05-19 00:52:05,189 : WordToHtmlLogger_M : Uploaded F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\ABASAGLAR~H~CAP~en~2021-05-18T19-20-50Z.zipsuccessfully | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-19 00:52:05,190 : WordToHtmlLogger_M : Deleting input word file: ABASAGLAR~H~CAP~en.docx | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-19 00:52:05,217 : WordToHtmlLogger_2 : Input file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-19 00:52:05,218 : WordToHtmlLogger_2 : Output file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Abilify Maintena\en\2021-05-18T19-22-05Z\Abilify Maintena_clean | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc


Input file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc
Output file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Abilify Maintena\en\2021-05-18T19-22-05Z\Abilify Maintena_clean


2021-05-19 00:54:18,892 : WordToHtmlLogger_2 : Opened file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc


Opened file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc


2021-05-19 00:54:20,499 : WordToHtmlLogger_2 : Starting document cleaning process | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc



Checking table 23
The selection starts on page 95 of 95 (619.3499755859375/70.75)
The selection ends on page 95 of 95 (737.2000122070312/70.75)
The selection contains
* inline images

Checking table 22
The selection starts on page 95 of 95 (303.29998779296875/70.75)
The selection ends on page 95 of 95 (480.3999938964844/70.75)
The selection contains
* inline images

Checking table 21
The selection starts on page 95 of 95 (107.05000305175781/70.75)
The selection ends on page 95 of 95 (227.4499969482422/70.75)
The selection contains
* inline images

Checking table 20
The selection starts on page 94 of 95 (653.0999755859375/70.75)
The selection ends on page 95 of 95 (56.70000076293945/70.75)
The selection contains
* inline images

Checking table 19
The selection starts on page 94 of 95 (494.45001220703125/70.75)
The selection ends on page 94 of 95 (627.5999755859375/70.75)
The selection contains
* inline images

Checking table 18
The selection starts on page 94 of 95 (353.0/70.75)
The se

2021-05-19 00:54:46,529 : WordToHtmlLogger_2 : Completed document cleaning process | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-19 00:54:46,532 : WordToHtmlLogger_2 : Preparing zip file | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-19 00:54:47,821 : WordToHtmlLogger_2 : Zip file created: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-18T19-22-05Z.zip | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-19 00:54:47,825 : WordToHtmlLogger_2 : Uploading to Azure Storage as blob:
	F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-18T19-22-05Z.zip | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc



Uploading File to  Azure Storage:
	F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-18T19-22-05Z.zip


2021-05-19 00:54:59,563 : WordToHtmlLogger_2 : Uploaded F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-18T19-22-05Z.zipsuccessfully | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-19 00:54:59,564 : WordToHtmlLogger_2 : Deleting input word file: Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-19 00:54:59,573 : WordToHtmlLogger_2 : Killing Word processes as exception was raised | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc


Exception raised
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE
Killing WINWORD.EXE


NoSuchProcess: psutil.NoSuchProcess process no longer exists (pid=34304)

In [51]:
# inputZipFolderPath = "F:\Projects\EMA\Repository\EMA EPI PoC\\function_code\\inputblob"
inputZipFolderPath = os.path.abspath(os.path.join('..'))
inputZipFolderPath = os.path.join(inputZipFolderPath, 'inputblob')
inputZipFileName = "Zynteglo~H~CAP~en~2021-05-19T07-52-54Z.zip"

In [52]:
fileNameQrd = 'qrd_canonical_model.csv'
fileNameMatchRuleBook = 'ruleDict.json'
fileNameDocumentTypeNames = 'documentTypeNames.json'
fsMountName = '/mounted'

info = inputZipFileName.split("~")

try:
    medName = info[0]
    domain = info[1]
    procedureType = info[2]
    languageCode = info[3]
    timestamp = info[4]
    timestamp = timestamp.replace(".zip","")

except Exception:
    raise f"Missing required info in the zip file name {inputZipFileName}"

if "\\" in os.getcwd():
    localEnv = True
    inputZipFolderPath = os.path.join(os.path.abspath(os.path.join('..')),inputZipFolderPath)
    outputFolderPath = os.path.join(os.path.abspath(os.path.join('..')), 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
    controlFolderPath = os.path.join(os.path.abspath(os.path.join('..')),'control')
else:
    localEnv = False
    inputZipFolderPath = os.path.join(f'{fsMountName}',inputZipFolderPath)
    outputFolderPath = os.path.join(f'{fsMountName}', 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
    controlFolderPath = os.path.join(f'{fsMountName}','control')


print(inputZipFileName, inputZipFolderPath, outputFolderPath, controlFolderPath)

mode = 0o666

if localEnv is True:
    inputZipFolderPath = inputZipFolderPath.replace("/","\\")
    outputFolderPath = outputFolderPath.replace("/","\\")
    controlFolderPath = controlFolderPath.replace("/","\\")

try:
    os.makedirs(inputZipFolderPath, mode)
    os.makedirs(outputFolderPath, mode)
    os.makedirs(controlFolderPath, mode)

except Exception:
    print("Already Present")
    
with zipfile.ZipFile(f'{inputZipFolderPath}/{inputZipFileName}',"r") as zip_ref:
        zip_ref.extractall(outputFolderPath)
    

_,_,fileNames = next(os.walk(outputFolderPath))
htmlFileName = [fileName for fileName in fileNames if ".htm" in fileName][0]

print(htmlFileName)



Zynteglo~H~CAP~en~2021-05-19T07-52-54Z.zip F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z F:\Projects\EMA\Repository\EMA EPI PoC\function_code\control
Already Present
Zynteglo_clean.htm


In [53]:
parseDocument(controlFolderPath, outputFolderPath, htmlFileName, fileNameQrd, fileNameMatchRuleBook, fileNameDocumentTypeNames, medName)

2021-05-20 00:22:58,330 : Flow Logger HTML_2 : Starting HTML Conversion To Json | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:22:58,336 : Style Dictionary_v : Reading style dictionary in file: rule_dictionary_en.json | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:22:58,336 : Style Dictionary_v : Reading style dictionary in file: rule_dictionary_en.json | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:22:58,367 : Style Dictionary_v : Qrd Section Keys Retrieved For Style Dictionary: ANNEX I, ANNEX II, ANNEX III, B. PACKAGE LEAFLET | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:22:58,367 : Style Dictionary_v : Qrd Section Keys Retrieved For Style Dictionary: ANNEX I, ANNEX II, ANNEX III, B. PACKAGE LEAFLET | H | CAP |  en | HTML | Zynteglo_clean.htm


['F:', 'Projects', 'EMA', 'Repository', 'EMA EPI PoC', 'function_code', 'work', 'H', 'CAP', 'Zynteglo', 'en', '2021-05-19T07-52-54Z'] Zynteglo_clean.htm
2021-05-19T07-52-54Z en Zynteglo CAP H
------------- F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\outputJSON\Zynteglo_clean.txt -----------------
F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\Zynteglo_clean.htm F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\outputJSON\Zynteglo_clean.json


2021-05-20 00:22:58,855 : Parser_b : Style Information Stored In File: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\outputJSON\Zynteglo_clean.txt | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:22:58,855 : Parser_b : Style Information Stored In File: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\outputJSON\Zynteglo_clean.txt | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:23:02,108 : Parser_b : Writing to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\outputJSON\Zynteglo_clean.json | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:23:02,108 : Parser_b : Writing to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\outputJSON\Zynteglo_clean.json | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:23:02,315 : Flow Logger HTML_2 : Completed HTML Co

stylePath:- F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\outputJSON\Zynteglo_clean.txt
Metrics : HTML Conversion To Json,0.0665 Min,5.550037 MB,10.13895 MB,66.4%

PathJson F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\outputJSON\Zynteglo_clean.json


2021-05-20 00:23:02,575 : Partition_h : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\partitionedJSONs\Zynteglo_clean_ANNEX II.json | H | CAP |  en | Json | Zynteglo_clean.json
2021-05-20 00:23:02,641 : Partition_h : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\partitionedJSONs\Zynteglo_clean_ANNEX III.json | H | CAP |  en | Json | Zynteglo_clean.json
2021-05-20 00:23:02,650 : Partition_h : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\partitionedJSONs\Zynteglo_clean_ PACKAGE LEAFLET.json | H | CAP |  en | Json | Zynteglo_clean.json
2021-05-20 00:23:02,663 : Flow Logger HTML_2 : ['Zynteglo_clean_SmPC.json', 'Zynteglo_clean_ANNEX II.json', 'Zynteglo_clean_ANNEX III.json', 'Zynteglo_clean_ PACKAGE LEAFLET.json'] | H | CAP |  en | HTML | Zynteglo_clean.htm
2

Metrics : Split Json,0.0057 Min,0.452009 MB,8.733447 MB,66.4%

Starting Heading Extraction For File :- Zynteglo_clean_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\partitionedJSONs\Zynteglo_clean_SmPC.json
--------------------------------------------
SmPC


2021-05-20 00:23:03,172 : Heading Extraction Zynteglo_clean_SmPC.json_L : Started Extracting Heading | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json
2021-05-20 00:23:03,455 : Heading Extraction Zynteglo_clean_SmPC.json_L : Match Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- 'SUMMARY OF PRODUCT CHARACTERISTICS' | Qrd txt :- 'SUMMARY OF PRODUCT CHARACTERISTICS' | Matched :- 'True'
2021-05-20 00:23:03,461 : Heading Extraction Zynteglo_clean_SmPC.json_L : Validation Passed As This The First Heading | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-05-20 00:23:03,750 : Heading Extraction Zynteglo_clean_SmPC.json_L : Match Passed : checkLowerCase|2.88|(99, 100, 99)|0.919| | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- 'medicinal product subject additional monitoring. allow quick identification new safety information. healthcare professionals asked report suspected adverse reactions


OriginalCheck

----------------------------------
RemovedByStyle
----------------------------------


2021-05-20 00:23:03,789 : Heading Extraction Zynteglo_clean_SmPC.json_L : Validation Failed By Style | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20002' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20001'
2021-05-20 00:23:03,912 : Heading Extraction Zynteglo_clean_SmPC.json_L : Match Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-05-20 00:23:03,929 : Heading Extraction Zynteglo_clean_SmPC.json_L : Validation Flow Is Broken | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20003' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20001'
2021-05-20 00:23:03,937 : Heading Extraction Zynteglo_clean_SmPC.json_L : Validation Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20003' | prevHeadingCurrId :- '20001' | prevHeadingFoundId :- '20001'
 dispersion for infusion. ' | Qrd txt :- '6.6 Special precautions


OriginalCheck



2021-05-20 00:23:05,035 : Heading Extraction Zynteglo_clean_SmPC.json_L : Match Passed : <=4|11.11|(95, 89, 95)|0.989| | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- 'Excipient with known effect' | Qrd txt :- 'Excipient(s) with known effect' | Matched :- 'True'
2021-05-20 00:23:05,050 : Heading Extraction Zynteglo_clean_SmPC.json_L : Validation Failed As Current H3 Heading Is Not Part Of Valid H3 Headings in Previous H2 | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20007' | prevHeadingCurrId :- '20006' | prevHeadingFoundId :- '20006'
2021-05-20 00:23:05,752 : Heading Extraction Zynteglo_clean_SmPC.json_L : Match Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- '3. PHARMACEUTICAL FORM' | Qrd txt :- '3. PHARMACEUTICAL FORM' | Matched :- 'True'
2021-05-20 00:23:05,762 : Heading Extraction Zynteglo_clean_SmPC.json_L : Validation Flow Is Broken | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20008' | prevHeadingCurrId :- '' | 

2021-05-20 00:23:12,953 : Heading Extraction Zynteglo_clean_SmPC.json_L : Validation Flow Is Broken | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20031' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20011'
2021-05-20 00:23:12,970 : Heading Extraction Zynteglo_clean_SmPC.json_L : Validation Failed As Current H3 Heading Is Not Part Of Valid H3 Headings in Previous H2 | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20031' | prevHeadingCurrId :- '20011' | prevHeadingFoundId :- '20011'
2021-05-20 00:23:12,997 : Heading Extraction Zynteglo_clean_SmPC.json_L : Match Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-20 00:23:13,013 : Heading Extraction Zynteglo_clean_SmPC.json_L : Validation Flow Is Broken | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20037' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20011'
2021-05-20 00:23:1

2021-05-20 00:23:21,386 : Heading Extraction Zynteglo_clean_SmPC.json_L : Validation Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20020' | prevHeadingCurrId :- '20017' | prevHeadingFoundId :- '20017'
 until at least 7 days after Zynteglo infusion (see section 4.4).' | Qrd txt :- '6.5 Nature and contents of container <and special equipment for use administration or implantation>' | Matched :- 'False'ld not take anti‑retroviral
 family of enzymes or drug transporters.' | Qrd txt :- '6.5 Nature and contents of container <and special equipment for use administration or implantation>' | Matched :- 'False'Doc txt :- 'No formal drug interaction studies have been
 erythropoiesis-stimulating agents in patients treated with Zynteglo.' | Qrd txt :- '6.5 Nature and contents of container <and special equipment for use administration or implantation>' | Matched :- 'False'cal experience with the use of
 during or following Zynteglo treatment has not been studied. ' | Qrd txt

 reactions attributed to myeloablative conditioning' | Qrd txt :- '6.5 Nature and contents of container <and special equipment for use administration or implantation>' | Matched :- 'False' 'Table 2              Adverse
 reactions attributed to myeloablative conditioning' | Qrd txt :- '6.6 Special precautions for disposal <and other handling>' | Matched :- 'False'0 | Zynteglo_clean_SmPC.json | Doc txt :- 'Table 2              Adverse
2021-05-20 00:23:28,615 : Heading Extraction Zynteglo_clean_SmPC.json_L : Match Failed : SpecialCase1|153.66|(29, 42, 40)|0.642| | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- 'Description of selected adverse reactions' | Qrd txt :- '6.5 Nature and contents of container <and special equipment for use administration or implantation>' | Matched :- 'False'
2021-05-20 00:23:29,017 : Heading Extraction Zynteglo_clean_SmPC.json_L : Match Failed : SpecialCase2|130.0|(31, 40, 36)|0.613| | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- 'Hepat

2021-05-20 00:23:31,111 : Heading Extraction Zynteglo_clean_SmPC.json_L : Match Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- 'Mechanism of action' | Qrd txt :- 'Mechanism of action' | Matched :- 'True'
2021-05-20 00:23:31,125 : Heading Extraction Zynteglo_clean_SmPC.json_L : Validation Failed As Current H3 Heading Is Not Part Of Valid H3 Headings in Previous H2 | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20034' | prevHeadingCurrId :- '20033' | prevHeadingFoundId :- '20033'
2021-05-20 00:23:31,359 : Heading Extraction Zynteglo_clean_SmPC.json_L : Match Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- 'Pharmacodynamic effects' | Qrd txt :- 'Pharmacodynamic effects' | Matched :- 'True'
2021-05-20 00:23:31,370 : Heading Extraction Zynteglo_clean_SmPC.json_L : Validation Flow Is Broken | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20035' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20033'
2021-05-20 00:23:31

2021-05-20 00:23:39,683 : Heading Extraction Zynteglo_clean_SmPC.json_L : Match Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- '6. PHARMACEUTICAL PARTICULARS' | Qrd txt :- '6. PHARMACEUTICAL PARTICULARS' | Matched :- 'True'
2021-05-20 00:23:39,696 : Heading Extraction Zynteglo_clean_SmPC.json_L : Validation Flow Is Broken | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20047' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20045'
2021-05-20 00:23:39,705 : Heading Extraction Zynteglo_clean_SmPC.json_L : Validation Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20047' | prevHeadingCurrId :- '20045' | prevHeadingFoundId :- '20045'
2021-05-20 00:23:39,916 : Heading Extraction Zynteglo_clean_SmPC.json_L : Match Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- '6.1 List of excipients' | Qrd txt :- '6.1 List of excipients' | Matched :- 'True'
2021-05-20 00:23:39,926 : Heading Extraction Zynteglo_clean_SmPC.json_L

2021-05-20 00:23:47,178 : Heading Extraction Zynteglo_clean_SmPC.json_L : Validation Flow Is Broken | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20055' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20053'
2021-05-20 00:23:47,186 : Heading Extraction Zynteglo_clean_SmPC.json_L : Validation Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20055' | prevHeadingCurrId :- '20053' | prevHeadingFoundId :- '20053'
 AUTHORISATION NUMBER(S) ' | Qrd txt :- '7. MARKETING AUTHORISATION HOLDER' | Matched :- 'False'22|(81, 85, 82)|0.923| | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- '8.       MARKETING
2021-05-20 00:23:48,521 : Heading Extraction Zynteglo_clean_SmPC.json_L : Match Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- '8. MARKETING AUTHORISATION NUMBER(S)' | Qrd txt :- '8. MARKETING AUTHORISATION NUMBER(S)' | Matched :- 'True'
2021-05-20 00:23:48,530 : Heading Extraction Zynteglo_clean_SmPC.json_L : Validation Pas



Heading Not Found 
 ['qThis medicinal product is subject to additional monitoring. This will allow quick identification of new safety information. Healthcare professionals are asked to report any suspected adverse reactions. See section 4.8 for how to report adverse reactions.', 'Excipient(s) with known effect', 'Posology', 'Paediatric population', 'Method of administration ', 'Precautions to be taken before handling or administering the medicinal product', 'Traceability', 'Paediatric population', 'Paediatric population', 'Pregnancy', 'Breast-feeding', 'Fertility', 'Paediatric population', 'Reporting of suspected adverse reactions', 'Paediatric population', 'Mechanism of action', 'Pharmacodynamic effects', 'Clinical efficacy and safety', 'Paediatric population', 'Absorption', 'Distribution', 'Biotransformation', 'Elimination', 'Linearity/non-linearity', 'Pharmacokinetic/pharmacodynamic relationship(s)', 'Environmental risk assessment (ERA)', 'Use in the paediatric population', 'DOSIM

2021-05-20 00:23:51,471 : Flow Logger HTML_2 : Completed Document Annotation | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:23:51,473 : Flow Logger HTML_2 : 0: Document Annotation,0.0264 Min,0.17923 MB,0.217825 MB,66.5%
 | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:23:51,475 : Flow Logger HTML_2 : Starting Extracting Content Between Heading | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:23:51,479 : ExtractContentBetween_0_C : Cleaning Match Results | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json
2021-05-20 00:23:51,484 : ExtractContentBetween_0_C : Finished Cleaning Match Results | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json
2021-05-20 00:23:51,535 : Flow Logger HTML_2 : Completed Extracting Content Between Heading | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:23:51,537 : Flow Logger HTML_2 : 0: Content Extraction,0.001 Min,0.677739 MB,4.068239 MB,66.5%
 | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:23:51,564 : XmlGeneration_0_

Error Found
Completed Document Annotation
Metrics : 0: Document Annotation,0.0264 Min,0.17923 MB,0.217825 MB,66.5%

Starting Extracting Content Between Heading For File :- Zynteglo_clean_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\partitionedJSONs\Zynteglo_clean_SmPC.json
--------------------------------------------
Completed Extracting Content Between Heading
Metrics : 0: Content Extraction,0.001 Min,0.677739 MB,4.068239 MB,66.5%

Already Exists


2021-05-20 00:23:51,745 : XmlGeneration_0_D : Writing to File:Zynteglo_clean_SmPC.xml | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json
2021-05-20 00:23:51,749 : Flow Logger HTML_2 : 0: Generate XML,0.0035 Min,2.796413 MB,5.351286 MB,66.6%
 | H | CAP |  en | HTML | Zynteglo_clean.htm


Metrics : 0: Generate XML,0.0035 Min,2.796413 MB,5.351286 MB,66.6%



2021-05-20 00:23:57,862 : XML Submission Logger_0_l : Initiating Submission To FHIR Server | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json
2021-05-20 00:23:57,864 : XML Submission Logger_0_l : Response{"resourceType":"Bundle","id":"3e538037-68e8-46a5-8234-c390805c0b10","meta":{"versionId":"1","lastUpdated":"2021-05-19T18:53:53.951+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:fff587ec-dfa6-4590-809a-e4fc7afc17c7","resource":{"resourceType":"Bundle","id":"fb9348b0-aa2d-416b-a153-4a2961e56f3c","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-05-19T18:53:51+00:00","entry":[{"fullUr | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json
2021-05-20 00:23:57,870 : XML Submission Logger_0_l : POST sucessful: XML added with id: 3e538037-68e8-46a5-8234-c390805c0b10 | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json
2021-05-20 00:23:57,872 : Flow Logger HTML_2 : 0: Submit FHIR Msg,0.102 Min,0.2

POST sucessful: XML added with id 3e538037-68e8-46a5-8234-c390805c0b10
Metrics : 0: Submit FHIR Msg,0.102 Min,0.222934 MB,2.152885 MB,66.5%

Created XML File For :- Zynteglo_clean_SmPC.json
Starting Heading Extraction For File :- Zynteglo_clean_ANNEX II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\partitionedJSONs\Zynteglo_clean_ANNEX II.json
--------------------------------------------
AnnexII


2021-05-20 00:23:58,102 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Started Extracting Heading | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json
2021-05-20 00:23:58,106 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Match Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'ANNEX II' | Qrd txt :- 'ANNEX II' | Matched :- 'True'
2021-05-20 00:23:58,111 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Validation Passed As This The First Heading | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
 RELEASE' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'True'nteglo_clean_ANNEX II.json | Doc txt :- 'A. MANUFACTURER(S) OF
2021-05-20 00:23:58,136 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '2100


OriginalCheck


OriginalCheck


OriginalCheck



 RELEASE' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'False'| Zynteglo_clean_ANNEX II.json | Doc txt :- 'A.      MANUFACTURER(S) OF THE
2021-05-20 00:23:58,689 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : End Of Sub Section | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json
 RELEASE' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'True'nteglo_clean_ANNEX II.json | Doc txt :- 'A.      MANUFACTURER(S) OF THE
2021-05-20 00:23:58,711 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Validation Passed As This The First Heading | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21002' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
 biological active substance(s)' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | 

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo

OriginalCheck



 for batch release' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'False' Zynteglo_clean_ANNEX II.json | Doc txt :- 'Name and address of the manufacturer(s) responsible
 for batch release' | Qrd txt :- 'Name and address of the manufacturer(s) of the biological active substance(s)' | Matched :- 'False'|  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'Name and address of the manufacturer(s) responsible
2021-05-20 00:23:59,008 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Match Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'Name and address of the manufacturer(s) responsible for batch release' | Qrd txt :- 'Name and address of the manufacturer(s) responsible for batch release' | Matched :- 'True'
2021-05-20 00:23:59,017 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '2100


OriginalCheck



2021-05-20 00:23:59,278 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Match Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'B. CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE' | Qrd txt :- 'B. CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE' | Matched :- 'True'
2021-05-20 00:23:59,288 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21005' | prevHeadingCurrId :- '21004' | prevHeadingFoundId :- '21004'
 Summary of Product Characteristics, section 4.2).' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'False'| Doc txt :- 'Medicinal
2021-05-20 00:23:59,534 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Match Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'C. OTHER CONDITIONS AND REQUIREMENTS OF THE MARKETING AUTHORISATION' | Qrd txt :- 'C. OTHER CONDITIO


OriginalCheck



 Safety Update Reports (PSURs)' | Qrd txt :- 'Periodic safety update reports (PSURs)' | Matched :- 'True'1, 92, 100)|0.917| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '·Periodic
2021-05-20 00:23:59,614 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21008' | prevHeadingCurrId :- '21007' | prevHeadingFoundId :- '21007'
 product within 6 months following authorisation. ' | Qrd txt :- 'C. OTHER CONDITIONS AND REQUIREMENTS OF THE MARKETING AUTHORISATION' | Matched :- 'False' Zynteglo_clean_ANNEX II.json | Doc txt :- 'The
 product within 6 months following authorisation. ' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'False' | Doc txt :- 'The
 WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT  ' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(


OriginalCheck


OriginalCheck



 management plan (RMP)' | Qrd txt :- 'Risk management plan (RMP)' | Matched :- 'True'assed : <=4|3.7|(98, 100, 100)|0.993| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '·Risk
2021-05-20 00:23:59,946 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21010' | prevHeadingCurrId :- '21009' | prevHeadingFoundId :- '21009'
 risk minimisation measures ' | Qrd txt :- 'Additional risk minimisation measures' | Matched :- 'True'(99, 100, 100)|0.995| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '·Additional
2021-05-20 00:24:00,287 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21011' | prevHeadingCurrId :- '21010' | prevHeadingFoundId :- '21010'
 contain:' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FORIn Lowercase : Contains<>|302.0|(6, 12, 86)|0.


OriginalCheck


OriginalCheck



2021-05-20 00:24:01,329 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Match Failed In Lowercase : Contains<>|202.74|(8, 8, 86)|0.367| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '·The Guide for healthcare professionals shall contain the following key elements:' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FOR
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'



OriginalCheck



 of patient’s guide' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'False'Zynteglo_clean_ANNEX II.json | Doc txt :- '–content
 of patient’s guide' | Qrd txt :- 'Name and address of the manufacturer(s) of the biological active substance(s)' | Matched :- 'False'7| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '–content
 of patient’s guide' | Qrd txt :- 'Name and address of the manufacturer(s) responsible for batch release' | Matched :- 'False'86)|0.527| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '–content



OriginalCheck


OriginalCheck


OriginalCheck



 professional' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FORowercase : Contains<>|214.49|(8, 10, 86)|0.396| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '–the
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'
 in the drug product Registry.' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'False'_clean_ANNEX II.json | Doc txt :- '–enrolment



OriginalCheck


OriginalCheck


OriginalCheck



 in the drug product Registry.' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FORns<>|377.5|(6, 10, 86)|0.359| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '–enrolment
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'
 following key elements:' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FORContains<>|141.35|(7, 9, 86)|0.377| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '·The
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'



OriginalCheck



2021-05-20 00:24:04,084 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Match Failed In Lowercase : Contains<>|351.16|(6, 12, 86)|0.337| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'oInstructions about the thawing of Zynteglo' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FOR
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'



OriginalCheck



2021-05-20 00:24:04,352 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Match Failed In Lowercase : Contains<>|343.18|(6, 11, 86)|0.336| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'The patient information pack should contain:' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FOR
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'



OriginalCheck



2021-05-20 00:24:04,807 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Match Failed In Lowercase : Contains<>|240.32|(7, 11, 86)|0.352| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '·The patient/carer guide shall contain the following key messages:' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FOR
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'



OriginalCheck



 in the drug product Registry.' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'False'clean_ANNEX II.json | Doc txt :- 'oEnrolment
 in the drug product Registry.' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FORns<>|375.0|(7, 12, 86)|0.375| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'oEnrolment
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'



OriginalCheck


OriginalCheck



2021-05-20 00:24:06,291 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Match Failed In Lowercase : Contains<>|242.62|(8, 11, 86)|0.365| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '·The patient alert card shall contain the following key messages:' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FOR
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'



OriginalCheck



 to conduct post-authorisation measures ' | Qrd txt :- 'Obligation to conduct post-authorisation measures' | Matched :- 'True' | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '·Obligation
2021-05-20 00:24:07,469 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21012' | prevHeadingCurrId :- '21011' | prevHeadingFoundId :- '21011'
 MAH shall complete, within the stated timeframe, the below measures:' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FORP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'The
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'
 AUTHORISATION' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'False'_clean_ANNEX II.json | Doc txt :- 'E.    SPECIFIC OBLIGATION TO



OriginalCheck


OriginalCheck



 AUTHORISATION' | Qrd txt :- 'C. OTHER CONDITIONS AND REQUIREMENTS OF THE MARKETING AUTHORISATION' | Matched :- 'False', 77, 86)|0.683| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'E.    SPECIFIC OBLIGATION TO
 AUTHORISATION' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'False'eglo_clean_ANNEX II.json | Doc txt :- 'E.    SPECIFIC OBLIGATION TO
 AUTHORISATION' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FORtains<>|65.26|(75, 95, 86)|0.93| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'E.    SPECIFIC OBLIGATION TO
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'True'
2021-05-20 00:24:07,949 : Heading Extraction Zynteglo_clean_ANNEX II.json_S : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21013' | prevHeadingCurrId :- '21012' | pr



Heading Not Found 
 ['Official batch release']


dict_keys([])
Completed Heading Extraction For File
Metrics : 1: Heading Extraction,0.1721 Min,0.587439 MB,2.95174 MB,66.3%

Starting Document Annotation For File :- Zynteglo_clean_ANNEX II.json
Error Found
Completed Document Annotation
Metrics : 1: Document Annotation,0.0001 Min,0.017176 MB,0.165624 MB,66.3%

Starting Extracting Content Between Heading For File :- Zynteglo_clean_ANNEX II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\partitionedJSONs\Zynteglo_clean_ANNEX II.json
--------------------------------------------
Completed Extracting Content Between Heading
Metrics : 1: Content Extraction,0.0005 Min,0.163535 MB,0.59007 MB,66.3%

Already Exists
Metrics : 1: Generate XML,0.0017 Min,0.874722 MB,1.947809 MB,66.3%



2021-05-20 00:24:10,543 : XML Submission Logger_1_Y : Initiating Submission To FHIR Server | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json
2021-05-20 00:24:10,544 : XML Submission Logger_1_Y : Response{"resourceType":"Bundle","id":"72b89386-2f24-405f-81fd-b525914fb328","meta":{"versionId":"1","lastUpdated":"2021-05-19T18:54:09.892+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:a718573e-b7a2-4bfe-b523-809a8fe03c37","resource":{"resourceType":"Bundle","id":"420ddba5-a66d-4925-9c54-2a6f0ab63e9b","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-05-19T18:54:08+00:00","entry":[{"fullUr | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json
2021-05-20 00:24:10,547 : XML Submission Logger_1_Y : POST sucessful: XML added with id: 72b89386-2f24-405f-81fd-b525914fb328 | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json
2021-05-20 00:24:10,548 : Flow Logger HTML_2 : 1: Submit FHIR Msg,0

POST sucessful: XML added with id 72b89386-2f24-405f-81fd-b525914fb328
Metrics : 1: Submit FHIR Msg,0.0365 Min,0.061356 MB,0.781 MB,66.3%

Created XML File For :- Zynteglo_clean_ANNEX II.json
Starting Heading Extraction For File :- Zynteglo_clean_ANNEX III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\partitionedJSONs\Zynteglo_clean_ANNEX III.json
--------------------------------------------
Labelling


2021-05-20 00:24:10,816 : Heading Extraction Zynteglo_clean_ANNEX III.json_S : Started Extracting Heading | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json
 LABELLING' | Qrd txt :- 'LABELLING ' | Matched :- 'True'an_ANNEX III.json_S : Match Passed : <=1|25.0|(86, 100, 95)|0.921| | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | Doc txt :- 'A.
2021-05-20 00:24:11,042 : Heading Extraction Zynteglo_clean_ANNEX III.json_S : Validation Passed As This The First Heading | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | currHeadId :- '22001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
 TO APPEAR ON THE OUTER PACKAGING – METAL CASSETTE' | Qrd txt :- 'PARTICULARS TO APPEAR ON <THE OUTER PACKAGING> <AND> <THE IMMEDIATE PACKAGING>' | Matched :- 'False'n_ANNEX III.json | Doc txt :- 'PARTICULARS
2021-05-20 00:24:11,223 : Heading Extraction Zynteglo_clean_ANNEX III.json_S : Match Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODU

2021-05-20 00:24:15,960 : Heading Extraction Zynteglo_clean_ANNEX III.json_S : Match Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | Doc txt :- '15. INSTRUCTIONS ON USE' | Qrd txt :- '15. INSTRUCTIONS ON USE' | Matched :- 'True'
2021-05-20 00:24:15,972 : Heading Extraction Zynteglo_clean_ANNEX III.json_S : Validation Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | currHeadId :- '22017' | prevHeadingCurrId :- '22016' | prevHeadingFoundId :- '22016'
2021-05-20 00:24:16,116 : Heading Extraction Zynteglo_clean_ANNEX III.json_S : Match Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | Doc txt :- '16. INFORMATION IN BRAILLE' | Qrd txt :- '16. INFORMATION IN BRAILLE' | Matched :- 'True'
2021-05-20 00:24:16,128 : Heading Extraction Zynteglo_clean_ANNEX III.json_S : Validation Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | currHeadId :- '22018' | prevHeadingCurrId :- '22017' | prevHeadingFoundId :- '22017'
2021-05-20 00:24:16,496 : Heading Extr


OriginalCheck



2021-05-20 00:24:18,688 : Heading Extraction Zynteglo_clean_ANNEX III.json_S : Match Passed : <=4|7.14|(93, 93, 93)|0.971| | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | Doc txt :- '3.       EXPIRY DATE' | Qrd txt :- '8. EXPIRY DATE' | Matched :- 'True'
2021-05-20 00:24:18,700 : Heading Extraction Zynteglo_clean_ANNEX III.json_S : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | currHeadId :- '22010' | prevHeadingCurrId :- '22009' | prevHeadingFoundId :- '22029'
2021-05-20 00:24:18,768 : Heading Extraction Zynteglo_clean_ANNEX III.json_S : Match Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | Doc txt :- '3. EXPIRY DATE' | Qrd txt :- '3. EXPIRY DATE' | Matched :- 'True'
2021-05-20 00:24:18,780 : Heading Extraction Zynteglo_clean_ANNEX III.json_S : Validation Failed As Wrong Heading Found | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | currHeadId :- '22024' | prevHeadingCurrId :- '' | prevHeadingFo

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-05-20 00:24:20,989 : Heading Extraction Zynteglo_clean_ANNEX III.json_S : Match Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | Doc txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Qrd txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Matched :- 'True'
2021-05-20 00:24:21,000 : Heading Extraction Zynteglo_clean_ANNEX III.json_S : Validation Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | currHeadId :- '22004' | prevHeadingCurrId :- '22003' | prevHeadingFoundId :- '22003'
 CODES' | Qrd txt :- '13. BATCH NUMBER<, DONATION AND PRODUCT CODES>' | Matched :- 'False'd : Contains<>|72.41|(70, 84, 84)|0.848| | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | Doc txt :- '3.       DONATION AND PRODUCT
 CODES' | Qrd txt :- '4. BATCH NUMBER<, DONATION AND PRODUCT CODES>' | Matched :- 'False'ed : Contains<>|72.41|(69, 84, 84)|0.834| | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | Doc txt :- '3.       DONATION AND PRODUCT
 CODES' | Qrd txt :- '4. BATCH NUMBER<, DONA



Heading Not Found 
 ['PARTICULARS TO APPEAR ON <THE OUTER PACKAGING> <AND> <THE IMMEDIATE PACKAGING>', 'MINIMUM PARTICULARS TO APPEAR ON BLISTERS OR STRIPS', 'NAME OF THE MARKETING AUTHORISATION HOLDER']


dict_keys([])
Completed Heading Extraction For File
Metrics : 2: Heading Extraction,0.2555 Min,1.459242 MB,3.036471 MB,66.0%

Starting Document Annotation For File :- Zynteglo_clean_ANNEX III.json
Error Found
Completed Document Annotation
Metrics : 2: Document Annotation,0.0002 Min,0.166697 MB,0.178517 MB,66.0%

Starting Extracting Content Between Heading For File :- Zynteglo_clean_ANNEX III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\partitionedJSONs\Zynteglo_clean_ANNEX III.json
--------------------------------------------
Completed Extracting Content Between Heading

2021-05-20 00:24:26,063 : Flow Logger HTML_2 : Completed Extracting Content Between Heading | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:24:26,066 : Flow Logger HTML_2 : 2: Content Extraction,0.0029 Min,0.544573 MB,1.177981 MB,66.0%
 | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:24:26,092 : XmlGeneration_2_r : PMS/OMS Annotation Information Not Retrieved | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json
2021-05-20 00:24:26,094 : XmlGeneration_2_r : Initiating XML Generation | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json
2021-05-20 00:24:26,177 : XmlGeneration_2_r : Writing to File:Zynteglo_clean_ANNEX III.xml | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json
2021-05-20 00:24:26,180 : Flow Logger HTML_2 : 2: Generate XML,0.0019 Min,0.929079 MB,1.879341 MB,66.0%
 | H | CAP |  en | HTML | Zynteglo_clean.htm



Metrics : 2: Content Extraction,0.0029 Min,0.544573 MB,1.177981 MB,66.0%

Already Exists
Metrics : 2: Generate XML,0.0019 Min,0.929079 MB,1.879341 MB,66.0%



2021-05-20 00:24:28,026 : XML Submission Logger_2_x : Initiating Submission To FHIR Server | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json
2021-05-20 00:24:28,027 : XML Submission Logger_2_x : Response{"resourceType":"OperationOutcome","id":"ec236462-70a0-4151-8c69-d6b876880efa","issue":[{"severity":"error","code":"exception","diagnostics":"There was an error processing your request."}]} | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json
2021-05-20 00:24:28,028 : XML Submission Logger_2_x : HTTP error occurred: 500 Server Error: Internal Server Error for url: https://ema-dap-epi-dev-fhir-api.azurewebsites.net/Bundle | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json
Traceback (most recent call last):
  File "F:\Projects\EMA\Repository\EMA EPI PoC\function_code\code\fhirService\fhirService.py", line 60, in submitFhirXml
    response.raise_for_status()
  File "C:\Users\vipsharm\AppData\Local\Continuum\anaconda3\lib\site-packages\requests\models.py", line 940, in raise_for_status
  

HTTP error occurred: 500 Server Error: Internal Server Error for url: https://ema-dap-epi-dev-fhir-api.azurewebsites.net/Bundle
Error log: There was an error processing your request.
Metrics : 2: Submit FHIR Msg,0.0309 Min,0.065868 MB,0.224854 MB,66.1%

Created XML File For :- Zynteglo_clean_ANNEX III.json
Starting Heading Extraction For File :- Zynteglo_clean_ PACKAGE LEAFLET.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-19T07-52-54Z\partitionedJSONs\Zynteglo_clean_ PACKAGE LEAFLET.json
--------------------------------------------
Package leaflet


2021-05-20 00:24:28,303 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Started Extracting Heading | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json
 PACKAGE LEAFLET' | Qrd txt :- 'PACKAGE LEAFLET' | Matched :- 'True' LEAFLET.json_W : Match Passed : <=4|16.67|(91, 100, 95)|0.913| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'B.
2021-05-20 00:24:28,321 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Validation Passed As This The First Heading | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
 leaflet: Information for the patient or carer' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'.566| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Package
 leaflet: Information for the patient or carer' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched 

----------------------------------
RemovedByStyle
----------------------------------


 effects you may get. See the end of section 4 for how to report side effects.' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'eglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'This
 effects you may get. See the end of section 4 for how to report side effects.' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'n_ PACKAGE LEAFLET.json | Doc txt :- 'This
 for you.' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'd : SpecialCase3|80.83|(13, 30, 32)|0.543| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Read all of this leaflet carefully
 for you.' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'alCase1|74.17|(28, 36, 86)|0.479| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Read all of this leaflet carefully
 leaflet. You may need to read it again. ' | Qrd txt :- 'Pregnancy <

----------------------------------
RemovedByStyle
----------------------------------


 Zynteglo is and what it is used for ' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'0.591| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '1.     What
 you need to know before you are given Zynteglo' | Qrd txt :- '2. What you need to know before you <take> <use> Zynteglo ' | Matched :- 'True'| CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '2.     What
2021-05-20 00:24:34,217 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Validation Flow Is Broken | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-05-20 00:24:34,226 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Validation Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-05-20 00:24:34,234 : Heading Extraction Zyn

----------------------------------
RemovedByStyle
----------------------------------


 you need to know before you are given Zynteglo' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False' CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '2.     What
 Zynteglo is given' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'alCase3|179.17|(11, 21, 22)|0.508| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '3.     How
 Zynteglo is given' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'08.33|(14, 33, 33)|0.515| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '3.     How
 side effects ' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'pecialCase3|170.83|(14, 17, 26)|0.537| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '4.     Possible
2021-05-20 00:24:34,941 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Match Pass

----------------------------------
RemovedByStyle
----------------------------------


 to store Zynteglo ' | Qrd txt :- '3. How to <take> <use> Zynteglo ' | Matched :- 'False'ch Failed : Contains<>|45.83|(73, 58, 75)|0.862| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '5.     How
2021-05-20 00:24:35,215 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Match Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '5. How to store Zynteglo' | Qrd txt :- '5. How to store Zynteglo' | Matched :- 'True'
2021-05-20 00:24:35,231 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Validation Flow Is Broken | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-05-20 00:24:35,240 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Validation Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23022' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-05-20 00:24:

----------------------------------
RemovedByStyle
----------------------------------


 of the pack and other information' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'9, 32, 40)|0.596| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '6.     Contents
2021-05-20 00:24:35,567 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Match Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '6. Contents of the pack and other information' | Qrd txt :- '6. Contents of the pack and other information' | Matched :- 'True'
2021-05-20 00:24:35,579 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Validation Flow Is Broken | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23023' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-05-20 00:24:35,591 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Validation Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23023' | prevHeadingCurrId :- '23003' 

----------------------------------
RemovedByStyle
----------------------------------


 before you are given Zynteglo' | Qrd txt :- '2. What you need to know before you <take> <use> Zynteglo ' | Matched :- 'True', 88)|0.961| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '2.       What you need to know
2021-05-20 00:24:35,868 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Validation Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '23004' | prevHeadingFoundId :- '23004'
 you:' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'ailed : SpecialCase3|107.89|(18, 23, 27)|0.558| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'You must not be given Zynteglo if
 you:' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'pecialCase1|126.32|(23, 26, 31)|0.514| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'You must not be given Zynteglo if
 any of the 

 platelet count has recovered to normal levels.' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'| CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Your doctor will tell you when your
2021-05-20 00:24:46,945 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Match Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Other medicines and Zynteglo' | Qrd txt :- 'Other medicines and Zynteglo' | Matched :- 'True'
2021-05-20 00:24:46,957 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Validation Flow Is Broken | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23009' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23007'
2021-05-20 00:24:46,966 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Validation Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23009' | prevHeadingCurrId :- '23007' | prevHeadingF

 TDT, administering bone marrow transplants, and using gene therapy medicines.' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'n_ PACKAGE LEAFLET.json | Doc txt :- 'Zynteglo
2021-05-20 00:24:56,644 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Match Failed : SpecialCase3|170.83|(14, 17, 26)|0.537| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '4.       Possible side effects' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'
2021-05-20 00:24:56,680 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Match Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '4. Possible side effects' | Qrd txt :- '4. Possible side effects' | Matched :- 'True'
2021-05-20 00:24:56,693 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Validation Flow Is Broken | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- 

2021-05-20 00:25:04,599 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Match Failed : SpecialCase3|440.0|(13, 30, 38)|0.566| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '·back pain' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'
 pain' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'ailed : SpecialCase3|210.0|(17, 30, 29)|0.483| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '·bone or muscle
 pain' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'pecialCase1|260.0|(8, 18, 27)|0.458| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '·bone or muscle
 discomfort' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False': SpecialCase3|164.0|(29, 40, 45)|0.522| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '·chest pain or
 di

 medicine' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'alCase1|238.1|(18, 28, 32)|0.485| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Chemotherapy
 receiving chemotherapy medicine, but can also develop much later.' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False' | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Tell
 receiving chemotherapy medicine, but can also develop much later.' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'nteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Tell
 more than 1 in 10 people)' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'87.76|(6, 10, 31)|0.555| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Very
 more than 1 in 10 people)' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'

      appetite' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'pecialCase3|200.0|(38, 44, 40)|0.622| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'decreased
      appetite' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'e1|283.33|(21, 39, 38)|0.519| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'decreased
2021-05-20 00:25:17,181 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Match Failed : SpecialCase3|500.0|(7, 25, 28)|0.54| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'tiredness' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'
2021-05-20 00:25:17,293 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Match Failed : SpecialCase3|450.0|(10, 43, 43)|0.566| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'itchy skin' | Qrd txt :- 'Pregn

 sensation of feeling off balance or like the room is spinning' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False' | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '·dizziness, or
 ' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'ch Failed : SpecialCase3|275.0|(15, 19, 40)|0.47| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '·memory problems
 ' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False' : SpecialCase1|337.5|(11, 25, 34)|0.381| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '·memory problems
 for Aspergillus (lung disease caused by fungus)' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'5| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '·positive test
 for Aspergillus (lung disease caused by fungus)' | Qrd txt :- 'This leaflet was last 

2021-05-20 00:25:26,468 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Match Failed : SpecialCase3|358.33|(23, 42, 40)|0.432| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '·gum disease' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'
2021-05-20 00:25:26,598 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Match Failed : SpecialCase1|450.0|(11, 42, 40)|0.476| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '·gum disease' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'
 (haemorrhoids)' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'ecialCase3|214.29|(8, 21, 29)|0.388| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '·piles
 (haemorrhoids)' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'1|238.1|(25, 38, 33)|0.453| | 

 lesions' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'ed : SpecialCase3|186.36|(22, 39, 37)|0.456| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '·inflamed skin
 lesions' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'ialCase1|213.64|(30, 36, 34)|0.578| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '·inflamed skin
 abrasion/scrape' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'cialCase3|190.48|(20, 29, 38)|0.473| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '·skin
 abrasion/scrape' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'|242.86|(15, 33, 36)|0.459| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '·skin
 disorder' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'd : S

      blood platelets, which may reduce the ability of blood to clot' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'| 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'low level of
      blood platelets, which may reduce the ability of blood to clot' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'teglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'low level of
      white blood cells' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'e3|136.67|(8, 17, 20)|0.479| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'low level of
      white blood cells' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'(22, 33, 30)|0.542| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'low level of
 breath' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'l

2021-05-20 00:25:42,951 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Validation Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23023' | prevHeadingCurrId :- '23022' | prevHeadingFoundId :- '23022'
 Zynteglo contains ' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'lCase3|186.36|(19, 23, 27)|0.52| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'What
2021-05-20 00:25:43,058 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Match Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'What Zynteglo contains' | Qrd txt :- 'What Zynteglo contains' | Matched :- 'True'
2021-05-20 00:25:43,068 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Validation Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23024' | prevHeadingCurrId :- '23023' | prevHeadingFoundId :- '23023'
 cells (blood stem cells) pe

2021-05-20 00:25:47,834 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Match Failed : SpecialCase1|450.0|(17, 42, 38)|0.423| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Manufacturer' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'
2021-05-20 00:25:47,924 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Match Failed : SpecialCase3|117.65|(21, 29, 31)|0.576| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Minaris Regenerative Medicine GmbH ' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'
2021-05-20 00:25:48,023 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_W : Match Failed : SpecialCase1|144.12|(28, 35, 29)|0.515| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'Minaris Regenerative Medicine GmbH ' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 

----------------------------------
RemovedByStyle
----------------------------------


 evidence to come about this medicine. ' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False', 33)|0.534| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'This
 evidence to come about this medicine. ' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False'1| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'This
 every year and this leaflet will be updated as necessary.' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'The
 every year and this leaflet will be updated as necessary.' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'False' | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'The
 of MS Agency (link)}>.' | Qrd txt :- 'Pregnancy <and> <,> breast-feeding <and fertility>' | Matched :- 'False'e3|84.21|(17, 28, 86

2021-05-20 00:26:04,627 : Flow Logger HTML_2 : Completed Heading Extraction For File | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:26:04,630 : Flow Logger HTML_2 : 3: Heading Extraction,1.6099 Min,1.08169 MB,3.157027 MB,66.5%
 | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:26:04,634 : Flow Logger HTML_2 : Starting Document Annotation For File | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:26:04,642 : Flow Logger HTML_2 : Completed Document Annotation | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:26:04,643 : Flow Logger HTML_2 : 3: Document Annotation,0.0002 Min,0.161992 MB,0.171669 MB,66.5%
 | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:26:04,648 : Flow Logger HTML_2 : Starting Extracting Content Between Heading | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-20 00:26:04,655 : ExtractContentBetween_3_3 : Cleaning Match Results | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json
2021-05-20 00:26:04,661 : ExtractContent



Heading Not Found 
 ['q This medicine is subject to additional monitoring. This will allow quick identification of new safety information. You can help by reporting any side effects you may get. See the end of section 4 for how to report side effects.', 'Do not <take> <use> X', 'Children <and adolescents>', 'X with <food> <and> <,> <drink> <and> <alcohol>', 'X contains {name the excipient(s)}', 'How to <take> <use> X ', 'Use in children <and adolescents>', 'If you <take> <use> more X than you should', 'If you forget to <take> <use> X>', 'If you stop <taking> <using> X>', 'Additional side effects in children <and adolescents>', 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.', 'Other sources of information']


dict_keys(['q This medicine is subject to additional monitoring. This will allow quick identification of new safety information. You can help by reporting any side effects you may get. See the end of section 4 for how to report side effects.', '1. What Zynteglo is a

2021-05-20 00:26:04,906 : XmlGeneration_3_j : Writing to File:Zynteglo_clean_ PACKAGE LEAFLET.xml | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json
2021-05-20 00:26:04,910 : Flow Logger HTML_2 : 3: Generate XML,0.0033 Min,1.693886 MB,3.088085 MB,66.5%
 | H | CAP |  en | HTML | Zynteglo_clean.htm


Metrics : 3: Generate XML,0.0033 Min,1.693886 MB,3.088085 MB,66.5%



2021-05-20 00:26:09,914 : XML Submission Logger_3_T : Initiating Submission To FHIR Server | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json
2021-05-20 00:26:09,916 : XML Submission Logger_3_T : Response{"resourceType":"Bundle","id":"89ea110f-6298-4063-841c-a840178dd623","meta":{"versionId":"1","lastUpdated":"2021-05-19T18:56:07.027+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:e70ed86c-e92a-4dc1-ab29-f56758656a93","resource":{"resourceType":"Bundle","id":"70c4c12a-bbc5-4008-a3b4-dfc01d978960","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-05-19T18:56:04+00:00","entry":[{"fullUr | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json
2021-05-20 00:26:09,923 : XML Submission Logger_3_T : POST sucessful: XML added with id: 89ea110f-6298-4063-841c-a840178dd623 | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json
2021-05-20 00:26:09,929 : Flow Logger HTML_

POST sucessful: XML added with id 89ea110f-6298-4063-841c-a840178dd623
Metrics : 3: Submit FHIR Msg,0.0836 Min,0.221378 MB,1.073556 MB,66.8%

Created XML File For :- Zynteglo_clean_ PACKAGE LEAFLET.json
Metrics : 3: Completed,0.0 Min,0.150221 MB,0.152617 MB,66.8%

Metrics : Final Metrics,3.1923 Min,0.0 MB,10.13895 MB,66.8%



In [20]:
a

Unnamed: 0,Bold,Classes,Element,HasBorder,ID,Indexed,IsHeadingType,IsListItem,IsPossibleHeading,Italics,ParentId,Styles,Text,Underlined,Uppercased,StringLength
0,True,['MsoNormal'],"<p align=""center"" class=""MsoNormal"" style=""margin-top:0in;margin-right:-.1pt;\r margin-bottom:0in;margin-left:27.0pt;text-align:center;text-indent:-27.0pt;\r line-height:normal""><b><span lang=""EN-...",False,320efb82-a156-45a3-afdd-0e3a7e54d6da,False,,False,True,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:-.1pt;\r\nmargin-bottom:0in;margin-left:27.0pt;text-align:center;text-indent:-27.0pt;\r\nline-height:normal,ANNEX\r II,False,True,8
1,False,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.1pt;margin-bottom:0in;\r margin-left:0in;line-height:normal""></p>",False,f79ed8ed-bfc4-4e8b-bd49-ede302291e8a,False,,False,False,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:-.1pt;margin-bottom:0in;\r\nmargin-left:0in;line-height:normal,,False,False,0
2,True,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""><b><span lang=""EN-GB"" style='font-family:""Times New ...",False,7b2ff4b3-f27a-47ca-96a9-7ae24cfdefab,True,L1,False,True,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,A. MANUFACTURER(S) OF\r THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND MANUFACTURER(S) RESPONSIBLE FOR BATCH\r RELEASE,False,True,106
3,False,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""></p>",False,d320b69e-a3c5-4764-aa31-251a32af8a17,False,,False,False,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,,False,False,0
4,True,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""><b><span lang=""EN-GB"" style='font-family:""Times New ...",False,e67b4687-f1a0-4608-988a-bcbb07029721,True,L1,False,True,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,B. CONDITIONS OR\r RESTRICTIONS REGARDING SUPPLY AND USE,False,True,54
5,False,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""></p>",False,a9013513-966a-4638-818e-90def54e0698,False,,False,False,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,,False,False,0
6,True,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""><b><span lang=""EN-GB"" style='font-family:""Times New ...",False,f9007321-3055-4120-b8ed-79e72cb470ae,True,L1,False,True,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,C. OTHER CONDITIONS AND\r REQUIREMENTS OF THE MARKETING AUTHORISATION,False,True,67
7,False,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""></p>",False,1b70d864-ef01-4bad-91f3-54dcbbe56cef,False,,False,False,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,,False,False,0
8,True,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""><b><span lang=""EN-GB"" style='font-family:""Times New ...",False,71e9cc22-be86-4f43-b932-f89b799916ba,True,L1,False,True,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,D. CONDITIONS OR\r RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT,False,True,96
9,False,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""></p>",False,d7fd1fa1-ea7c-4b20-8b1b-11cbcafde70c,False,,False,False,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,,False,False,0


In [39]:
convertCollectionToDataFrame(b)

NameError: name 'b' is not defined