In [1]:
import os
import zipfile
%load_ext autoreload

%autoreload 2

In [2]:
import tracemalloc
import psutil
import pprint
import pandas as pd
import uuid
import json
import os
import glob
import re
import sys
from bs4 import NavigableString, BeautifulSoup
from collections import defaultdict
import random
import string
import time

from utils.config import config
from utils.logger.logger import loggerCreator

# ePI Modules
from parse.rulebook.rulebook import StyleRulesDictionary

from parse.extractor.parser import parserExtractor
from match.matchDocument.matchDocument import MatchDocument
from documentAnnotation.documentAnnotation import DocumentAnnotation
from htmlDocTypePartitioner.partition import DocTypePartitioner
from extractContentBetweenHeadings.dataBetweenHeadingsExtractor import DataBetweenHeadingsExtractor
from fhirXmlGenerator.fhirXmlGenerator import FhirXmlGenerator
from fhirService.fhirService import FhirService
from utils.logger.matchLogger import MatchLogger
from languageInfo.documentTypeNames.documentTypeNames import DocumentTypeNames


class FolderNotFoundError(Exception):
    pass

class Metrics:
    
    def __init__(self, logFileName, logger):
        self.logFileName = logFileName
        self.start()
        self.writer = open(self.logFileName, 'a')
        self.writer.write("StepName,Time,Current Memory,Peak Memory,Used Ram Percentage\n")
        self.finalPeak = 0
        self.finalTotalTime = 0
        self.finalUsedRamPerc = 0
        self.logger = logger
    
    def start(self):
        self.startTime = time.time()
        tracemalloc.start()
    
    def getMetric(self, msg):
        
        self.endTime = time.time()
        
        self.totalTime = self.endTime - self.startTime
        
        
        current, peak = tracemalloc.get_traced_memory()
        current = current / 10**6
        peak = peak / 10**6
        
        usedRamPerc = psutil.virtual_memory()[2]
        
        self.finalPeak = max(self.finalPeak, peak)
        self.finalUsedRamPerc = max(self.finalUsedRamPerc, usedRamPerc)

        self.finalTotalTime = self.finalTotalTime + self.totalTime
        #self.finalTotalTime = round(self.finalTotalTime/60,3)
        
        outputString = f"{msg},{round(self.totalTime/60,4)} Min,{current} MB,{peak} MB,{usedRamPerc}%\n"
        
        self.logger.logFlowCheckpoint(f"{outputString}")
        
        print(f"Metrics : {outputString}")
        self.writer.write(outputString)
        tracemalloc.stop()
        tracemalloc.start()
        self.startTime = time.time()
    def end(self):
        
        current, peak = tracemalloc.get_traced_memory()
        current = current / 10**6
        outputString = f"Final Metrics,{round(self.finalTotalTime/60,4)} Min,{current} MB,{self.finalPeak} MB,{self.finalUsedRamPerc}%\n"
        print(f"Metrics : {outputString}")
        self.logger.logFlowCheckpoint(f"{outputString}")
        self.writer.write(outputString)
        self.writer.close()
        tracemalloc.stop()
        
        


def convertToInt(x):
    try:
        return str(int(x))
    except:
        return x


def convertCollectionToDataFrame(collection):

    dfExtractedHier = pd.DataFrame(collection)
    dfExtractedHier['parent_id'] = dfExtractedHier['parent_id'].apply(
        lambda x: convertToInt(x))
    dfExtractedHier['id'] = dfExtractedHier['id'].apply(
        lambda x: convertToInt(x))

    return dfExtractedHier

def getRandomString(N):
    str_ = ''.join(random.choice(string.ascii_uppercase + string.digits
                                 + string.ascii_lowercase) for _ in range(N))
    return str_


def convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog):

    module_path = os.path.join(basePath)

    if "/" in basePath:
        pathSep = "/"
    else:
        pathSep = "\\"
    
    # Generate output folder path
    output_json_path = os.path.join(basePath, 'outputJSON')

    """
        Check if input folder exists, else throw exception
    """
    if(os.path.exists(module_path)):
        filenames = glob.glob(os.path.join(module_path, htmlDocName))

        # Create language specific folder in outputJSON folder if it doesn't exist
        if(not os.path.exists(output_json_path)):
            os.mkdir(output_json_path)
        logger = MatchLogger(f'Parser_{getRandomString(1)}', htmlDocName,
                             domain, procedureType, languageCode, "HTML", fileNameLog)

        styleLogger = MatchLogger(
            f'Style Dictionary_{getRandomString(1)}', htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)

        styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                             controlBasePath=controlBasePath,
                                             language=languageCode,
                                             fileName=fileNameQrd,
                                             domain=domain,
                                             procedureType=procedureType
                                             )

        parserObj = parserExtractor(config, logger, styleRulesObj.styleRuleDict,
                                    styleRulesObj.styleFeatureKeyList,
                                    styleRulesObj.qrd_section_headings)

        for input_filename in filenames:
          # if(input_filename.find('Kalydeco II-86-PI-clean')!=-1):
            output_filename = os.path.join(output_json_path, htmlDocName)
            style_filepath =  output_filename.replace('.html','.txt')
            style_filepath =  style_filepath.replace('.txtl','.txt')
            style_filepath =  style_filepath.replace('.htm','.txt')
            print("-------------",style_filepath,"-----------------")

            output_filename = output_filename.replace('.html', '.json')
            output_filename = output_filename.replace('.htm', '.json')
            print(input_filename, output_filename)
            parserObj.createPIJsonFromHTML(input_filepath=input_filename,
                                           output_filepath=output_filename,
                                           style_filepath = style_filepath,
                                           img_base64_dict=parserObj.convertImgToBase64(input_filename)
                                           )
            
        return output_filename.split(pathSep)[-1], style_filepath
    else:
        try:    
            raise FolderNotFoundError(module_path + " not found")
        except:  
            logger.logFlowCheckpoint("Folder For Language Code Not Found In Input File")
            logger.logException("Folder For Language Code Not Found In Input File")
        raise FolderNotFoundError(module_path + " not found")
        return None


def splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog):

    styleLogger = MatchLogger(
        f'Style Dictionary_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                        controlBasePath=controlBasePath,
                                        language=languageCode,
                                        fileName=fileNameQrd,
                                        domain=domain,
                                        procedureType=procedureType
                                        )
    
    path_json = os.path.join(basePath,'outputJSON', fileNameJson)
    print("PathJson",path_json)
    partitionLogger = MatchLogger(
        f'Partition_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    partitioner = DocTypePartitioner(partitionLogger)

    partitionedJsonPaths = partitioner.partitionHtmls(
        styleRulesObj.qrd_section_headings, path_json)

    return partitionedJsonPaths


def extractAndValidateHeadings(controlBasePath,
                                basePath,
                                domain,
                                procedureType,
                                languageCode,
                                documentNumber,
                                fileNameDoc,
                                fileNameQrd,
                                fileNameMatchRuleBook,
                                fileNameDocumentTypeNames,
                                fileNameLog,
                                stopWordFilterLen=6,
                                isPackageLeaflet=False,
                                medName=None
                                ):

    if documentNumber == 0:
        topHeadingsConsidered = 4
        bottomHeadingsConsidered = 6
    elif documentNumber == 1:
        topHeadingsConsidered = 3
        bottomHeadingsConsidered = 5
    elif documentNumber == 2:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 15
    else:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 10

    print(f"Starting Heading Extraction For File :- {fileNameDoc}")
    logger = MatchLogger(f"Heading Extraction {fileNameDoc}_{getRandomString(1)}", fileNameDoc, domain, procedureType, languageCode, documentNumber, fileNameLog)
    logger.logFlowCheckpoint("Starting Heading Extraction")

    stopWordlanguage = DocumentTypeNames(
        controlBasePath=controlBasePath,
        fileNameDocumentTypeNames=fileNameDocumentTypeNames,
        languageCode=languageCode,
        domain=domain,
        procedureType=procedureType,
        documentNumber=documentNumber
        ).extractStopWordLanguage()

    matchDocObj = MatchDocument(
        logger,
        controlBasePath,
        basePath,
        domain,
        procedureType,
        languageCode,
        documentNumber,
        fileNameDoc,
        fileNameQrd,
        fileNameMatchRuleBook,
        fileNameDocumentTypeNames,
        topHeadingsConsidered,
        bottomHeadingsConsidered,
        stopWordFilterLen,
        stopWordlanguage,
        isPackageLeaflet,
        medName)
    df, coll, documentType = matchDocObj.matchHtmlHeaddingsWithQrd()

    return df, coll, documentType


def parseDocument(controlBasePath, basePath ,htmlDocName, fileNameQrd, fileNameMatchRuleBook, fileNameDocumentTypeNames, medName = None):
    
    
    if "/" in basePath:
        pathSep = "/"        
    else:
        pathSep = "\\"
    
    fileNameLog = os.path.join(basePath,'FinalLog.txt')

    pathComponents = basePath.split(pathSep)
    print(pathComponents, htmlDocName)
    timestamp = pathComponents[-1]
    languageCode =  pathComponents[-2]
    medName = pathComponents[-3]
    procedureType = pathComponents[-4]
    domain = pathComponents[-5]

    print(timestamp, languageCode, medName, procedureType, domain)
        
    flowLogger =  MatchLogger(f"Flow Logger HTML_{getRandomString(1)}", htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)
    
    metrics = Metrics(os.path.join(basePath,'Metrics.csv'),flowLogger)
    
    
    flowLogger.logFlowCheckpoint("Starting HTML Conversion To Json")
    ###Convert Html to Json
    fileNameJson, stylesFilePath = convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog)
    
    print("stylePath:-",stylesFilePath)
    flowLogger.logFlowCheckpoint("Completed HTML Conversion To Json")
    metrics.getMetric("HTML Conversion To Json")

    flowLogger.logFlowCheckpoint("Starting Json Split")

    ###Split Uber Json to multiple Jsons for each category.
    partitionedJsonPaths = splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog)
    
    partitionedJsonPaths = [ path.split(pathSep)[-1] for path in partitionedJsonPaths]
    flowLogger.logFlowCheckpoint(str(partitionedJsonPaths))
    
    flowLogger.logFlowCheckpoint("Completed Json Split")
    metrics.getMetric("Split Json")
    
    flowLogger.logFlowCheckpoint("Started Processing Partitioned Jsons")
    
    for index, fileNamePartitioned in enumerate(partitionedJsonPaths):
        #print("Index", index)
        #if index in [0,1,3]:
        #    continue
        flowLogger.logFlowCheckpoint(f"\n\n\n\n||||||||||||||||||||||||||||||||{str(index)} ||||| {str(fileNamePartitioned)}||||||||||||||||||||||||||||||||\n\n\n\n")
        
        if index == 3:
            stopWordFilterLen = 100
            isPackageLeaflet = True
        else:
            stopWordFilterLen = 6
            isPackageLeaflet = False
            
        df, coll, documentType = extractAndValidateHeadings(controlBasePath,
                                    basePath,
                                    domain,
                                    procedureType,
                                    languageCode,
                                    index,
                                    fileNamePartitioned,
                                    fileNameQrd,
                                    fileNameMatchRuleBook,
                                    fileNameDocumentTypeNames,
                                    fileNameLog,
                                    stopWordFilterLen=stopWordFilterLen,
                                    isPackageLeaflet=isPackageLeaflet,
                                    medName=medName)
        
        
        print(f"Completed Heading Extraction For File")
        flowLogger.logFlowCheckpoint("Completed Heading Extraction For File")
        metrics.getMetric(f"{index}: Heading Extraction")

        print(f"Starting Document Annotation For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Document Annotation For File")
        documentAnnotationObj = DocumentAnnotation(fileNamePartitioned,'c20835db4b1b4e108828a8537ff41506','https://spor-sit.azure-api.net/pms/api/v2/',df,coll)
        try:
            pms_oms_annotation_data = documentAnnotationObj.processRegulatedAuthorizationForDoc()
            print(pms_oms_annotation_data)
        except:
            pms_oms_annotation_data = None
            print("Error Found")
            
        print(f"Completed Document Annotation")        
        flowLogger.logFlowCheckpoint("Completed Document Annotation")
        metrics.getMetric(f"{index}: Document Annotation")
        
        print(f"Starting Extracting Content Between Heading For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Extracting Content Between Heading")
        
        extractContentlogger =  MatchLogger(f'ExtractContentBetween_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        extractorObj = DataBetweenHeadingsExtractor(extractContentlogger, basePath, coll)
        dfExtractedHierRR = extractorObj.extractContentBetweenHeadings(fileNamePartitioned)
        
        print(f"Completed Extracting Content Between Heading")        
        flowLogger.logFlowCheckpoint("Completed Extracting Content Between Heading")
        metrics.getMetric(f"{index}: Content Extraction")
        
        xmlLogger =  MatchLogger(f'XmlGeneration_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        fhirXmlGeneratorObj = FhirXmlGenerator(xmlLogger, controlBasePath, basePath, pms_oms_annotation_data, stylesFilePath, medName)
        fileNameXml = fileNamePartitioned.replace('.json','.xml')
        generatedXml = fhirXmlGeneratorObj.generateXml(dfExtractedHierRR, fileNameXml)
        
        metrics.getMetric(f"{index}: Generate XML")
        
        fhirServiceLogger =  MatchLogger(f'XML Submission Logger_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)

        fhirServiceObj = FhirService(fhirServiceLogger, basePath, generatedXml)
        fhirServiceObj.submitFhirXml()
        
        metrics.getMetric(f"{index}: Submit FHIR Msg")
        
        print(f"Created XML File For :- {fileNamePartitioned}")      
        
        #return df,coll,dfExtractedHierRR
    
    
    flowLogger.logFlowCheckpoint("Completed Processing Partitioned Jsons")
    metrics.getMetric(f"{index}: Completed")
    metrics.end()

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\vipsharm\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [3]:
from wordToHtmlConvertor.wordToHtmlConvertor import WordToHtmlConvertor

wordToHtmlConvertorObj = WordToHtmlConvertor()
wordToHtmlConvertorObj.convertWordToHTML()

2021-05-25 16:18:01,002 : WordToHtmlLogger_p : Input file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:18:01,002 : WordToHtmlLogger_p : Output file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Abilify Maintena\en\2021-05-25T10-48-00Z\Abilify Maintena_clean | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc


Word Files in folder:  ['Abilify Maintena~H~CAP~en.doc', 'ABILIFY~H~CAP~en.doc', 'Adakveo~H~CAP~en.docx', 'Adcetris~H~CAP~en.doc']
Input file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc
Output file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Abilify Maintena\en\2021-05-25T10-48-00Z\Abilify Maintena_clean


2021-05-25 16:18:21,947 : WordToHtmlLogger_p : Opened file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc


Opened file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc


2021-05-25 16:18:24,068 : WordToHtmlLogger_p : Starting document cleaning process | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc



Checking table 23
The selection starts on page 95 of 95 (619.0999755859375/71.05000305175781)
The selection ends on page 95 of 95 (737.0499877929688/71.05000305175781)
The selection contains
* inline images

Checking table 22
The selection starts on page 95 of 95 (303.6499938964844/71.05000305175781)
The selection ends on page 95 of 95 (480.29998779296875/71.05000305175781)
The selection contains
* inline images

Checking table 21
The selection starts on page 95 of 95 (107.55000305175781/71.05000305175781)
The selection ends on page 95 of 95 (227.35000610351562/71.05000305175781)
The selection contains
* inline images

Checking table 20
The selection starts on page 94 of 95 (652.75/71.05000305175781)
The selection ends on page 95 of 95 (56.849998474121094/71.05000305175781)
The selection contains
* inline images

Checking table 19
The selection starts on page 94 of 96 (494.54998779296875/71.05000305175781)
The selection ends on page 94 of 96 (627.6500244140625/71.05000305175781)
The s

2021-05-25 16:18:50,345 : WordToHtmlLogger_p : Completed document cleaning process | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:18:50,350 : WordToHtmlLogger_p : Preparing zip file | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:18:51,722 : WordToHtmlLogger_p : Zip file created: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-25T10-48-00Z.zip | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:18:51,727 : WordToHtmlLogger_p : Uploading to Azure Storage as blob:
	F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-25T10-48-00Z.zip | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc



Uploading File to  Azure Storage:
	F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-25T10-48-00Z.zip


2021-05-25 16:19:02,017 : WordToHtmlLogger_p : Uploaded F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-25T10-48-00Z.zipsuccessfully | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:19:02,022 : WordToHtmlLogger_p : Deleting input word file: Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:19:02,024 : WordToHtmlLogger_p : Killing Word processes as exception was raised | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:19:02,036 : WordToHtmlLogger_b : Input file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:19:02,037 : WordToHtmlLogger_b : Output file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Abilify Maintena\en\2021-05-25T10-49-02Z\Abilify Maintena_clean | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc


Exception raised
Input file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc
Output file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Abilify Maintena\en\2021-05-25T10-49-02Z\Abilify Maintena_clean


2021-05-25 16:19:04,284 : WordToHtmlLogger_b : Opened file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc


Opened file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc


2021-05-25 16:19:05,527 : WordToHtmlLogger_b : Starting document cleaning process | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc



Checking table 23
The selection starts on page 95 of 95 (619.0999755859375/71.05000305175781)
The selection ends on page 95 of 95 (737.0499877929688/71.05000305175781)
The selection contains
* inline images

Checking table 22
The selection starts on page 95 of 95 (303.6499938964844/71.05000305175781)
The selection ends on page 95 of 95 (480.29998779296875/71.05000305175781)
The selection contains
* inline images

Checking table 21
The selection starts on page 95 of 95 (107.55000305175781/71.05000305175781)
The selection ends on page 95 of 95 (227.35000610351562/71.05000305175781)
The selection contains
* inline images

Checking table 20
The selection starts on page 94 of 95 (652.75/71.05000305175781)
The selection ends on page 95 of 95 (56.849998474121094/71.05000305175781)
The selection contains
* inline images

Checking table 19
The selection starts on page 94 of 96 (494.54998779296875/71.05000305175781)
The selection ends on page 94 of 96 (627.6500244140625/71.05000305175781)
The s

2021-05-25 16:19:30,695 : WordToHtmlLogger_b : Completed document cleaning process | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:19:30,695 : WordToHtmlLogger_b : Preparing zip file | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:19:31,908 : WordToHtmlLogger_b : Zip file created: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-25T10-49-02Z.zip | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:19:31,913 : WordToHtmlLogger_b : Uploading to Azure Storage as blob:
	F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-25T10-49-02Z.zip | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc



Uploading File to  Azure Storage:
	F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-25T10-49-02Z.zip


2021-05-25 16:19:42,372 : WordToHtmlLogger_b : Uploaded F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-25T10-49-02Z.zipsuccessfully | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:19:42,372 : WordToHtmlLogger_b : Deleting input word file: Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:19:42,376 : WordToHtmlLogger_b : Killing Word processes as exception was raised | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:19:42,389 : WordToHtmlLogger_r : Input file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:19:42,390 : WordToHtmlLogger_r : Output file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Abilify Maintena\en\2021-05-25T10-49-42Z\Abilify Maintena_clean | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc


Exception raised
Input file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc
Output file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Abilify Maintena\en\2021-05-25T10-49-42Z\Abilify Maintena_clean


2021-05-25 16:19:44,714 : WordToHtmlLogger_r : Opened file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc


Opened file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc


2021-05-25 16:19:45,995 : WordToHtmlLogger_r : Starting document cleaning process | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc



Checking table 23
The selection starts on page 95 of 95 (619.0999755859375/71.05000305175781)
The selection ends on page 95 of 95 (737.0499877929688/71.05000305175781)
The selection contains
* inline images

Checking table 22
The selection starts on page 95 of 95 (303.6499938964844/71.05000305175781)
The selection ends on page 95 of 95 (480.29998779296875/71.05000305175781)
The selection contains
* inline images

Checking table 21
The selection starts on page 95 of 95 (107.55000305175781/71.05000305175781)
The selection ends on page 95 of 95 (227.35000610351562/71.05000305175781)
The selection contains
* inline images

Checking table 20
The selection starts on page 94 of 95 (652.75/71.05000305175781)
The selection ends on page 95 of 95 (56.849998474121094/71.05000305175781)
The selection contains
* inline images

Checking table 19
The selection starts on page 94 of 96 (494.54998779296875/71.05000305175781)
The selection ends on page 94 of 96 (627.6500244140625/71.05000305175781)
The s

2021-05-25 16:20:11,047 : WordToHtmlLogger_r : Completed document cleaning process | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:20:11,049 : WordToHtmlLogger_r : Preparing zip file | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:20:12,893 : WordToHtmlLogger_r : Zip file created: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-25T10-49-42Z.zip | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:20:12,896 : WordToHtmlLogger_r : Uploading to Azure Storage as blob:
	F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-25T10-49-42Z.zip | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc



Uploading File to  Azure Storage:
	F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-25T10-49-42Z.zip


2021-05-25 16:20:22,638 : WordToHtmlLogger_r : Uploaded F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-25T10-49-42Z.zipsuccessfully | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:20:22,638 : WordToHtmlLogger_r : Deleting input word file: Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-25 16:20:22,643 : WordToHtmlLogger_r : Killing Word processes as exception was raised | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc


Exception raised


COMError: (-2147023174, 'The RPC server is unavailable.', (None, None, None, 0, None))

In [15]:
# inputZipFolderPath = "F:\Projects\EMA\Repository\EMA EPI PoC\\function_code\\inputblob"
inputZipFolderPath = os.path.abspath(os.path.join('..'))
inputZipFolderPath = os.path.join(inputZipFolderPath, 'inputblob')
inputZipFileName = "Zynteglo~H~CAP~en~2021-05-25T07-52-54Z.zip"

In [16]:
fileNameQrd = 'qrd_canonical_model.csv'
fileNameMatchRuleBook = 'ruleDict.json'
fileNameDocumentTypeNames = 'documentTypeNames.json'
fsMountName = '/mounted'

info = inputZipFileName.split("~")

try:
    medName = info[0]
    domain = info[1]
    procedureType = info[2]
    languageCode = info[3]
    timestamp = info[4]
    timestamp = timestamp.replace(".zip","")

except Exception:
    raise f"Missing required info in the zip file name {inputZipFileName}"

if "\\" in os.getcwd():
    localEnv = True
    inputZipFolderPath = os.path.join(os.path.abspath(os.path.join('..')),inputZipFolderPath)
    outputFolderPath = os.path.join(os.path.abspath(os.path.join('..')), 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
    controlFolderPath = os.path.join(os.path.abspath(os.path.join('..')),'control')
else:
    localEnv = False
    inputZipFolderPath = os.path.join(f'{fsMountName}',inputZipFolderPath)
    outputFolderPath = os.path.join(f'{fsMountName}', 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
    controlFolderPath = os.path.join(f'{fsMountName}','control')


print(inputZipFileName, inputZipFolderPath, outputFolderPath, controlFolderPath)

mode = 0o666

if localEnv is True:
    inputZipFolderPath = inputZipFolderPath.replace("/","\\")
    outputFolderPath = outputFolderPath.replace("/","\\")
    controlFolderPath = controlFolderPath.replace("/","\\")

try:
    os.makedirs(inputZipFolderPath, mode)
    os.makedirs(outputFolderPath, mode)
    os.makedirs(controlFolderPath, mode)

except Exception:
    print("Already Present")
    
with zipfile.ZipFile(f'{inputZipFolderPath}/{inputZipFileName}',"r") as zip_ref:
        zip_ref.extractall(outputFolderPath)
    

_,_,fileNames = next(os.walk(outputFolderPath))
htmlFileName = [fileName for fileName in fileNames if ".htm" in fileName][0]

print(htmlFileName)



Zynteglo~H~CAP~en~2021-05-25T07-52-54Z.zip F:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z F:\Projects\EMA\Repository\EMA EPI PoC\function_code\control
Already Present
Zynteglo_clean.htm


In [17]:
parseDocument(controlFolderPath, outputFolderPath, htmlFileName, fileNameQrd, fileNameMatchRuleBook, fileNameDocumentTypeNames, medName)

2021-05-25 16:43:06,017 : Flow Logger HTML_l : Starting HTML Conversion To Json | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:43:06,024 : Style Dictionary_x : Reading style dictionary in file: rule_dictionary_en.json | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:43:06,066 : Style Dictionary_x : Qrd Section Keys Retrieved For Style Dictionary: ANNEX I, ANNEX II, ANNEX III, B. PACKAGE LEAFLET | H | CAP |  en | HTML | Zynteglo_clean.htm


['F:', 'Projects', 'EMA', 'Repository', 'EMA EPI PoC', 'function_code', 'work', 'H', 'CAP', 'Zynteglo', 'en', '2021-05-25T07-52-54Z'] Zynteglo_clean.htm
2021-05-25T07-52-54Z en Zynteglo CAP H
------------- F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\outputJSON\Zynteglo_clean.txt -----------------
F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\Zynteglo_clean.htm F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\outputJSON\Zynteglo_clean.json


2021-05-25 16:43:06,832 : Parser_L : Style Information Stored In File: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\outputJSON\Zynteglo_clean.txt | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:43:11,165 : Parser_L : Writing to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\outputJSON\Zynteglo_clean.json | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:43:11,435 : Flow Logger HTML_l : Completed HTML Conversion To Json | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:43:11,437 : Flow Logger HTML_l : HTML Conversion To Json,0.0903 Min,5.377755 MB,9.985314 MB,48.8%
 | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:43:11,447 : Flow Logger HTML_l : Starting Json Split | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:43:11,453 : Style Dictionary_6 : Reading style dictionary in file: rule_dictionary_en.json | H | CAP |  en | Json | Zynteglo_

stylePath:- F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\outputJSON\Zynteglo_clean.txt
Metrics : HTML Conversion To Json,0.0903 Min,5.377755 MB,9.985314 MB,48.8%

PathJson F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\outputJSON\Zynteglo_clean.json


2021-05-25 16:43:11,679 : Partition_x : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\partitionedJSONs\Zynteglo_clean_SmPC.json | H | CAP |  en | Json | Zynteglo_clean.json
2021-05-25 16:43:11,679 : Partition_x : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\partitionedJSONs\Zynteglo_clean_SmPC.json | H | CAP |  en | Json | Zynteglo_clean.json
2021-05-25 16:43:11,760 : Partition_x : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\partitionedJSONs\Zynteglo_clean_ANNEX II.json | H | CAP |  en | Json | Zynteglo_clean.json
2021-05-25 16:43:11,760 : Partition_x : Writing partition to file: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\partitionedJSONs\Zynteglo_clean_ANNEX II.json | H | CAP |  en | Json | Zynteglo

Metrics : Split Json,0.0071 Min,0.272836 MB,9.339947 MB,48.8%

Starting Heading Extraction For File :- Zynteglo_clean_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\partitionedJSONs\Zynteglo_clean_SmPC.json
--------------------------------------------
SmPC


2021-05-25 16:43:12,317 : Heading Extraction Zynteglo_clean_SmPC.json_B : Started Extracting Heading | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json
2021-05-25 16:43:12,578 : Heading Extraction Zynteglo_clean_SmPC.json_B : Match Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- 'SUMMARY OF PRODUCT CHARACTERISTICS' | Qrd txt :- 'SUMMARY OF PRODUCT CHARACTERISTICS' | Matched :- 'True'
2021-05-25 16:43:12,583 : Heading Extraction Zynteglo_clean_SmPC.json_B : Validation Passed As This The First Heading | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-05-25 16:43:12,827 : Heading Extraction Zynteglo_clean_SmPC.json_B : Match Passed : checkLowerCase|2.88|(99, 100, 99)|0.919| | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- 'medicinal product subject additional monitoring. allow quick identification new safety information. healthcare professionals asked report suspected adverse reactions


OriginalCheck

----------------------------------
RemovedByStyle
----------------------------------


2021-05-25 16:43:12,936 : Heading Extraction Zynteglo_clean_SmPC.json_B : Match Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-05-25 16:43:12,946 : Heading Extraction Zynteglo_clean_SmPC.json_B : Validation Flow Is Broken | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20003' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20001'
2021-05-25 16:43:12,951 : Heading Extraction Zynteglo_clean_SmPC.json_B : Validation Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20003' | prevHeadingCurrId :- '20001' | prevHeadingFoundId :- '20001'
2021-05-25 16:43:13,493 : Heading Extraction Zynteglo_clean_SmPC.json_B : Match Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- '2. QUALITATIVE AND QUANTITATIVE COMPOSITION' | Qrd txt :- '2. QUALITATIVE AND QUANTITATIVE COMPOSITION' | Matched :- 'True'
2021-05-25 16:43:13,502


OriginalCheck



2021-05-25 16:43:14,294 : Heading Extraction Zynteglo_clean_SmPC.json_B : Match Passed : <=4|11.11|(95, 89, 95)|0.989| | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- 'Excipient with known effect' | Qrd txt :- 'Excipient(s) with known effect' | Matched :- 'True'
2021-05-25 16:43:14,310 : Heading Extraction Zynteglo_clean_SmPC.json_B : Validation Failed As Current H3 Heading Is Not Part Of Valid H3 Headings in Previous H2 | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20007' | prevHeadingCurrId :- '20006' | prevHeadingFoundId :- '20006'
2021-05-25 16:43:15,442 : Heading Extraction Zynteglo_clean_SmPC.json_B : Match Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- '3. PHARMACEUTICAL FORM' | Qrd txt :- '3. PHARMACEUTICAL FORM' | Matched :- 'True'
2021-05-25 16:43:15,452 : Heading Extraction Zynteglo_clean_SmPC.json_B : Validation Flow Is Broken | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20008' | prevHeadingCurrId :- '' | 

2021-05-25 16:43:26,518 : Heading Extraction Zynteglo_clean_SmPC.json_B : Match Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- '4.3 Contraindications' | Qrd txt :- '4.3 Contraindications' | Matched :- 'True'
2021-05-25 16:43:26,529 : Heading Extraction Zynteglo_clean_SmPC.json_B : Validation Flow Is Broken | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20016' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20011'
2021-05-25 16:43:26,541 : Heading Extraction Zynteglo_clean_SmPC.json_B : Validation Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20016' | prevHeadingCurrId :- '20011' | prevHeadingFoundId :- '20011'
2021-05-25 16:43:28,116 : Heading Extraction Zynteglo_clean_SmPC.json_B : Validation Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20017' | prevHeadingCurrId :- '20016' | prevHeadingFoundId :- '20016'
2021-05-25 16:43:28,197 : Heading Extraction Zynteglo_clean_SmPC.json_B : Match Passed | H 

2021-05-25 16:43:45,031 : Heading Extraction Zynteglo_clean_SmPC.json_B : Match Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-25 16:43:45,042 : Heading Extraction Zynteglo_clean_SmPC.json_B : Validation Flow Is Broken | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20031' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20027'
2021-05-25 16:43:45,053 : Heading Extraction Zynteglo_clean_SmPC.json_B : Validation Failed As Current H3 Heading Is Not Part Of Valid H3 Headings in Previous H2 | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20031' | prevHeadingCurrId :- '20027' | prevHeadingFoundId :- '20027'
2021-05-25 16:43:45,081 : Heading Extraction Zynteglo_clean_SmPC.json_B : Match Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- 'Paediatric population' | Qrd txt :- 'Paediatric population' | Matched :- 'True'
2021-05-25 16:43:45

2021-05-25 16:43:59,194 : Heading Extraction Zynteglo_clean_SmPC.json_B : Validation Failed As Current H3 Heading Is Not Part Of Valid H3 Headings in Previous H2 | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20037' | prevHeadingCurrId :- '20033' | prevHeadingFoundId :- '20033'
2021-05-25 16:44:00,051 : Heading Extraction Zynteglo_clean_SmPC.json_B : Match Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | Doc txt :- '5.2 Pharmacokinetic properties' | Qrd txt :- '5.2 Pharmacokinetic properties' | Matched :- 'True'
2021-05-25 16:44:00,074 : Heading Extraction Zynteglo_clean_SmPC.json_B : Validation Flow Is Broken | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20038' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20033'
2021-05-25 16:44:00,085 : Heading Extraction Zynteglo_clean_SmPC.json_B : Validation Passed | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json | currHeadId :- '20038' | prevHeadingCurrId :- '20033' | prevHeadingFoundId :- '20033'
202

2021-05-25 16:44:21,441 : Flow Logger HTML_l : Starting Document Annotation For File | H | CAP |  en | HTML | Zynteglo_clean.htm




Heading Not Found 
 ['qThis medicinal product is subject to additional monitoring. This will allow quick identification of new safety information. Healthcare professionals are asked to report any suspected adverse reactions. See section 4.8 for how to report adverse reactions.', 'Excipient(s) with known effect', 'Posology', 'Paediatric population', 'Method of administration ', 'Precautions to be taken before handling or administering the medicinal product', 'Traceability', 'Paediatric population', 'Paediatric population', 'Pregnancy', 'Breast-feeding', 'Fertility', 'Paediatric population', 'Reporting of suspected adverse reactions', 'Paediatric population', 'Mechanism of action', 'Pharmacodynamic effects', 'Clinical efficacy and safety', 'Paediatric population', 'Absorption', 'Distribution', 'Biotransformation', 'Elimination', 'Linearity/non-linearity', 'Pharmacokinetic/pharmacodynamic relationship(s)', 'Environmental risk assessment (ERA)', 'Use in the paediatric population', 'DOSIM

2021-05-25 16:44:22,698 : Flow Logger HTML_l : Completed Document Annotation | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:44:22,701 : Flow Logger HTML_l : 0: Document Annotation,0.021 Min,0.168741 MB,0.224301 MB,49.4%
 | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:44:22,704 : Flow Logger HTML_l : Starting Extracting Content Between Heading | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:44:22,712 : ExtractContentBetween_0_0 : Cleaning Match Results | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json
2021-05-25 16:44:22,727 : ExtractContentBetween_0_0 : Finished Cleaning Match Results | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json
2021-05-25 16:44:22,862 : Flow Logger HTML_l : Completed Extracting Content Between Heading | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:44:22,864 : Flow Logger HTML_l : 0: Content Extraction,0.0027 Min,0.677174 MB,4.070631 MB,49.4%
 | H | CAP |  en | HTML | Zynteglo_clean.htm


Error Found
Completed Document Annotation
Metrics : 0: Document Annotation,0.021 Min,0.168741 MB,0.224301 MB,49.4%

Starting Extracting Content Between Heading For File :- Zynteglo_clean_SmPC.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\partitionedJSONs\Zynteglo_clean_SmPC.json
--------------------------------------------
Completed Extracting Content Between Heading
Metrics : 0: Content Extraction,0.0027 Min,0.677174 MB,4.070631 MB,49.4%

Already Exists


2021-05-25 16:44:22,942 : XmlGeneration_0_J : PMS/OMS Annotation Information Not Retrieved | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json
2021-05-25 16:44:22,945 : XmlGeneration_0_J : Initiating XML Generation | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json
2021-05-25 16:44:23,529 : XmlGeneration_0_J : Writing to File:Zynteglo_clean_SmPC.xml | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json
2021-05-25 16:44:23,537 : Flow Logger HTML_l : 0: Generate XML,0.0112 Min,2.770887 MB,5.321824 MB,49.4%
 | H | CAP |  en | HTML | Zynteglo_clean.htm


Metrics : 0: Generate XML,0.0112 Min,2.770887 MB,5.321824 MB,49.4%



2021-05-25 16:44:27,523 : XML Submission Logger_0_i : Initiating Submission To FHIR Server | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json
2021-05-25 16:44:27,527 : XML Submission Logger_0_i : Response{"resourceType":"Bundle","id":"74601927-cc47-4452-9002-7575edd5c4e0","meta":{"versionId":"1","lastUpdated":"2021-05-25T11:14:26.37+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:67c173c8-821d-42e9-a6eb-b763fe62de79","resource":{"resourceType":"Bundle","id":"28d45b62-dd44-44ff-979b-d7e1d944fd6c","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-05-25T11:14:22+00:00","entry":[{"fullUrl | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json
2021-05-25 16:44:27,541 : XML Submission Logger_0_i : POST sucessful: XML added with id: 74601927-cc47-4452-9002-7575edd5c4e0 | H | CAP |  en | 0 | Zynteglo_clean_SmPC.json
2021-05-25 16:44:27,544 : Flow Logger HTML_l : 0: Submit FHIR Msg,0.0666 Min,0.

POST sucessful: XML added with id 74601927-cc47-4452-9002-7575edd5c4e0
Metrics : 0: Submit FHIR Msg,0.0666 Min,0.0515 MB,1.977845 MB,49.8%

Created XML File For :- Zynteglo_clean_SmPC.json
Starting Heading Extraction For File :- Zynteglo_clean_ANNEX II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\partitionedJSONs\Zynteglo_clean_ANNEX II.json
--------------------------------------------
AnnexII


2021-05-25 16:44:28,115 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Started Extracting Heading | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json
2021-05-25 16:44:28,126 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Match Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'ANNEX II' | Qrd txt :- 'ANNEX II' | Matched :- 'True'
2021-05-25 16:44:28,136 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Validation Passed As This The First Heading | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
 RELEASE' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'True'nteglo_clean_ANNEX II.json | Doc txt :- 'A. MANUFACTURER(S) OF
2021-05-25 16:44:28,224 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '2100


OriginalCheck



2021-05-25 16:44:29,264 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Validation Flow Is Broken | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21013' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '21009'
2021-05-25 16:44:29,273 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21013' | prevHeadingCurrId :- '21009' | prevHeadingFoundId :- '21009'
 RELEASE' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'True'nteglo_clean_ANNEX II.json | Doc txt :- 'A.      MANUFACTURER(S) OF THE
2021-05-25 16:44:29,347 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21002' | prevHeadingCurrId :- '21001' | prevHeadingFoundId :- '21013'
 RELEASE' | Qrd txt :- 'Name


OriginalCheck


OriginalCheck



2021-05-25 16:44:29,889 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : End Of Sub Section | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json
 RELEASE' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'True'nteglo_clean_ANNEX II.json | Doc txt :- 'A.      MANUFACTURER(S) OF THE
2021-05-25 16:44:29,950 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Validation Passed As This The First Heading | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21002' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
 biological active substance(s)' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'False'ean_ANNEX II.json | Doc txt :- 'Name and address of the manufacturer(s) of the
2021-05-25 16:44:30,062 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Match Passed | H | CAP |  en | 1 | Zynteglo

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo

OriginalCheck



2021-05-25 16:44:30,078 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21003' | prevHeadingCurrId :- '21002' | prevHeadingFoundId :- '21002'
 for batch release' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'False' Zynteglo_clean_ANNEX II.json | Doc txt :- 'Name and address of the manufacturer(s) responsible
2021-05-25 16:44:30,780 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Match Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'Name and address of the manufacturer(s) responsible for batch release' | Qrd txt :- 'Name and address of the manufacturer(s) responsible for batch release' | Matched :- 'True'
2021-05-25 16:44:30,790 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21004' | p


OriginalCheck



2021-05-25 16:44:31,440 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Match Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'B. CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE' | Qrd txt :- 'B. CONDITIONS OR RESTRICTIONS REGARDING SUPPLY AND USE' | Matched :- 'True'
2021-05-25 16:44:31,456 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21005' | prevHeadingCurrId :- '21004' | prevHeadingFoundId :- '21004'
 Summary of Product Characteristics, section 4.2).' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'False'| Doc txt :- 'Medicinal



OriginalCheck



2021-05-25 16:44:32,037 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Match Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'C. OTHER CONDITIONS AND REQUIREMENTS OF THE MARKETING AUTHORISATION' | Qrd txt :- 'C. OTHER CONDITIONS AND REQUIREMENTS OF THE MARKETING AUTHORISATION' | Matched :- 'True'
2021-05-25 16:44:32,052 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Validation Flow Is Broken | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21007' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '21005'
2021-05-25 16:44:32,061 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21007' | prevHeadingCurrId :- '21005' | prevHeadingFoundId :- '21005'
 Safety Update Reports (PSURs)' | Qrd txt :- 'Periodic safety update reports (PSURs)' | Matched :- 'True'1, 92, 100)|0.917| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '·Periodic
20


OriginalCheck


OriginalCheck



2021-05-25 16:44:32,843 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Match Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-05-25 16:44:32,853 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21009' | prevHeadingCurrId :- '21008' | prevHeadingFoundId :- '21008'
 management plan (RMP)' | Qrd txt :- 'Risk management plan (RMP)' | Matched :- 'True'assed : <=4|3.7|(98, 100, 100)|0.993| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '·Risk
2021-05-25 16:44:33,033 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21010' | prevHeadingCu


OriginalCheck



2021-05-25 16:44:35,463 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Match Failed In Lowercase : >7|164.1|(13, 22, 86)|0.331| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'oThe Summary of Product Characteristics ' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'False'



OriginalCheck



2021-05-25 16:44:36,665 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Match Failed In Lowercase : Contains<>|202.74|(8, 8, 86)|0.367| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '·The Guide for healthcare professionals shall contain the following key elements:' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FOR
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'



OriginalCheck



 of patient’s guide' | Qrd txt :- 'A. <MANUFACTURER(S) OF THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND> MANUFACTURER(S) RESPONSIBLE FOR BATCH RELEASE' | Matched :- 'False'Zynteglo_clean_ANNEX II.json | Doc txt :- '–content
 of patient’s guide' | Qrd txt :- 'Name and address of the manufacturer(s) of the biological active substance(s)' | Matched :- 'False'7| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '–content
 of patient’s guide' | Qrd txt :- 'Name and address of the manufacturer(s) responsible for batch release' | Matched :- 'False'86)|0.527| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '–content



OriginalCheck


OriginalCheck


OriginalCheck



2021-05-25 16:44:40,711 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Match Failed In Lowercase : Contains<>|214.49|(8, 10, 86)|0.396| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '–the need to carry the patient alert card and show it to every healthcare professional' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FOR
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'



OriginalCheck


OriginalCheck



 in the drug product Registry.' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'False'_clean_ANNEX II.json | Doc txt :- '–enrolment
 in the drug product Registry.' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FORns<>|377.5|(6, 10, 86)|0.359| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '–enrolment
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'



OriginalCheck



2021-05-25 16:44:41,829 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Match Failed In Lowercase : Contains<>|141.35|(7, 9, 86)|0.377| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '·The Guide to handling and method of administration for healthcare professionals shall contain the following key elements:' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FOR
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'



OriginalCheck



2021-05-25 16:44:42,425 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Match Failed In Lowercase : Contains<>|351.16|(6, 12, 86)|0.337| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'oInstructions about the thawing of Zynteglo' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FOR
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'



OriginalCheck



2021-05-25 16:44:42,948 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Match Failed In Lowercase : Contains<>|343.18|(6, 11, 86)|0.336| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'The patient information pack should contain:' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FOR
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'



OriginalCheck



2021-05-25 16:44:43,822 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Match Failed In Lowercase : Contains<>|240.32|(7, 11, 86)|0.352| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '·The patient/carer guide shall contain the following key messages:' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FOR
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'



OriginalCheck



2021-05-25 16:44:46,492 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Match Failed In Lowercase : >7|162.5|(5, 15, 86)|0.33| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'oEnrolment in the drug product Registry.' | Qrd txt :- 'D. CONDITIONS OR RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT' | Matched :- 'False'



OriginalCheck


OriginalCheck



 in the drug product Registry.' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FORns<>|375.0|(7, 12, 86)|0.375| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'oEnrolment
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'
 shall contain the following key messages:' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FOR(8, 11, 86)|0.365| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '·The
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'



OriginalCheck



 to conduct post-authorisation measures ' | Qrd txt :- 'Obligation to conduct post-authorisation measures' | Matched :- 'True' | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- '·Obligation
2021-05-25 16:44:49,489 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21012' | prevHeadingCurrId :- '21011' | prevHeadingFoundId :- '21011'
 MAH shall complete, within the stated timeframe, the below measures:' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FORP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'The
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'False'



OriginalCheck



 AUTHORISATION' | Qrd txt :- 'C. OTHER CONDITIONS AND REQUIREMENTS OF THE MARKETING AUTHORISATION' | Matched :- 'False', 77, 86)|0.683| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'E.    SPECIFIC OBLIGATION TO
 AUTHORISATION' | Qrd txt :- 'E. SPECIFIC OBLIGATION TO COMPLETE POST-AUTHORISATION MEASURES FORtains<>|65.26|(75, 95, 86)|0.93| | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | Doc txt :- 'E.    SPECIFIC OBLIGATION TO
<THE CONDITIONAL MARKETING AUTHORISATION> <THE MARKETING AUTHORISATION UNDER EXCEPTIONAL CIRCUMSTANCES>' | Matched :- 'True'



OriginalCheck



2021-05-25 16:44:50,534 : Heading Extraction Zynteglo_clean_ANNEX II.json_R : Validation Passed | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json | currHeadId :- '21013' | prevHeadingCurrId :- '21012' | prevHeadingFoundId :- '21012'
2021-05-25 16:44:51,109 : Flow Logger HTML_l : Completed Heading Extraction For File | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:44:51,112 : Flow Logger HTML_l : 1: Heading Extraction,0.3927 Min,0.469113 MB,3.060004 MB,49.7%
 | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:44:51,120 : Flow Logger HTML_l : Starting Document Annotation For File | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:44:51,147 : Flow Logger HTML_l : Completed Document Annotation | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:44:51,152 : Flow Logger HTML_l : 1: Document Annotation,0.0006 Min,0.156515 MB,0.16676 MB,49.7%
 | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:44:51,156 : Flow Logger HTML_l : Starting Extracting Content Be



Heading Not Found 
 ['Official batch release']


dict_keys([])
Completed Heading Extraction For File
Metrics : 1: Heading Extraction,0.3927 Min,0.469113 MB,3.060004 MB,49.7%

Starting Document Annotation For File :- Zynteglo_clean_ANNEX II.json
Error Found
Completed Document Annotation
Metrics : 1: Document Annotation,0.0006 Min,0.156515 MB,0.16676 MB,49.7%

Starting Extracting Content Between Heading For File :- Zynteglo_clean_ANNEX II.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\partitionedJSONs\Zynteglo_clean_ANNEX II.json
--------------------------------------------
Completed Extracting Content Between Heading
Metrics : 1: Content Extraction,0.0016 Min,0.168473 MB,0.631027 MB,49.7%

Already Exists


2021-05-25 16:44:51,324 : XmlGeneration_1_G : PMS/OMS Annotation Information Not Retrieved | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json
2021-05-25 16:44:51,326 : XmlGeneration_1_G : Initiating XML Generation | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json
2021-05-25 16:44:51,562 : XmlGeneration_1_G : Writing to File:Zynteglo_clean_ANNEX II.xml | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json
2021-05-25 16:44:51,570 : Flow Logger HTML_l : 1: Generate XML,0.0052 Min,0.861677 MB,1.928049 MB,49.8%
 | H | CAP |  en | HTML | Zynteglo_clean.htm


Metrics : 1: Generate XML,0.0052 Min,0.861677 MB,1.928049 MB,49.8%



2021-05-25 16:44:53,724 : XML Submission Logger_1_C : Initiating Submission To FHIR Server | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json
2021-05-25 16:44:53,726 : XML Submission Logger_1_C : Response{"resourceType":"Bundle","id":"25c941e7-4b0e-4dd3-a65a-61d529cc84ce","meta":{"versionId":"1","lastUpdated":"2021-05-25T11:14:53.118+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:06323792-c8eb-4bb3-8fe8-6b7723df83b7","resource":{"resourceType":"Bundle","id":"cefa5a7d-db7e-4796-aa97-572c5283fc57","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-05-25T11:14:51+00:00","entry":[{"fullUr | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json
2021-05-25 16:44:53,731 : XML Submission Logger_1_C : POST sucessful: XML added with id: 25c941e7-4b0e-4dd3-a65a-61d529cc84ce | H | CAP |  en | 1 | Zynteglo_clean_ANNEX II.json
2021-05-25 16:44:53,733 : Flow Logger HTML_l : 1: Submit FHIR Msg,0

POST sucessful: XML added with id 25c941e7-4b0e-4dd3-a65a-61d529cc84ce
Metrics : 1: Submit FHIR Msg,0.036 Min,0.194823 MB,0.903287 MB,49.6%

Created XML File For :- Zynteglo_clean_ANNEX II.json
Starting Heading Extraction For File :- Zynteglo_clean_ANNEX III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\partitionedJSONs\Zynteglo_clean_ANNEX III.json
--------------------------------------------
Labelling


2021-05-25 16:44:54,119 : Heading Extraction Zynteglo_clean_ANNEX III.json_C : Started Extracting Heading | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json
 LABELLING' | Qrd txt :- 'LABELLING ' | Matched :- 'True'an_ANNEX III.json_C : Match Passed : <=1|25.0|(86, 100, 95)|0.921| | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | Doc txt :- 'A.
2021-05-25 16:44:54,737 : Heading Extraction Zynteglo_clean_ANNEX III.json_C : Validation Passed As This The First Heading | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | currHeadId :- '22001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-05-25 16:44:55,186 : Heading Extraction Zynteglo_clean_ANNEX III.json_C : Match Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | Doc txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Qrd txt :- '1. NAME OF THE MEDICINAL PRODUCT' | Matched :- 'True'
2021-05-25 16:44:55,195 : Heading Extraction Zynteglo_clean_ANNEX III.json_C : Validation Flow Is Broken | H | CAP |  en | 2 | Zynteglo_c

2021-05-25 16:45:05,896 : Heading Extraction Zynteglo_clean_ANNEX III.json_C : Validation Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | currHeadId :- '22017' | prevHeadingCurrId :- '22016' | prevHeadingFoundId :- '22016'
2021-05-25 16:45:06,094 : Heading Extraction Zynteglo_clean_ANNEX III.json_C : Match Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | Doc txt :- '16. INFORMATION IN BRAILLE' | Qrd txt :- '16. INFORMATION IN BRAILLE' | Matched :- 'True'
2021-05-25 16:45:06,105 : Heading Extraction Zynteglo_clean_ANNEX III.json_C : Validation Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | currHeadId :- '22018' | prevHeadingCurrId :- '22017' | prevHeadingFoundId :- '22017'
2021-05-25 16:45:06,748 : Heading Extraction Zynteglo_clean_ANNEX III.json_C : Match Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | Doc txt :- '17. UNIQUE IDENTIFIER – 2D BARCODE' | Qrd txt :- '17. UNIQUE IDENTIFIER – 2D BARCODE' | Matched :- 'True'
2021-05-25 16:45


OriginalCheck



2021-05-25 16:45:10,661 : Heading Extraction Zynteglo_clean_ANNEX III.json_C : Match Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | Doc txt :- '2. METHOD OF ADMINISTRATION' | Qrd txt :- '2. METHOD OF ADMINISTRATION' | Matched :- 'True'
2021-05-25 16:45:10,682 : Heading Extraction Zynteglo_clean_ANNEX III.json_C : Validation Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | currHeadId :- '22029' | prevHeadingCurrId :- '22028' | prevHeadingFoundId :- '22028'
2021-05-25 16:45:11,372 : Heading Extraction Zynteglo_clean_ANNEX III.json_C : Match Passed : <=4|7.14|(93, 93, 93)|0.971| | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | Doc txt :- '3.       EXPIRY DATE' | Qrd txt :- '8. EXPIRY DATE' | Matched :- 'True'
2021-05-25 16:45:11,391 : Heading Extraction Zynteglo_clean_ANNEX III.json_C : Validation Failed As Previous Heading Found is not matching | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | currHeadId :- '22010' | prevHeadingCurrId :- '22009' | prevH

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-05-25 16:45:17,995 : Heading Extraction Zynteglo_clean_ANNEX III.json_C : Match Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | Doc txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Qrd txt :- '2. STATEMENT OF ACTIVE SUBSTANCE(S)' | Matched :- 'True'
2021-05-25 16:45:18,018 : Heading Extraction Zynteglo_clean_ANNEX III.json_C : Validation Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | currHeadId :- '22004' | prevHeadingCurrId :- '22003' | prevHeadingFoundId :- '22003'
2021-05-25 16:45:25,465 : Heading Extraction Zynteglo_clean_ANNEX III.json_C : Validation Flow Is Broken | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | currHeadId :- '22009' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '22004'
2021-05-25 16:45:25,473 : Heading Extraction Zynteglo_clean_ANNEX III.json_C : Validation Passed | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json | currHeadId :- '22009' | prevHeadingCurrId :- '22004' | prevHeadingFoundId :- '22004'
 CONDITIONS' | Qrd txt 



Heading Not Found 
 ['PARTICULARS TO APPEAR ON <THE OUTER PACKAGING> <AND> <THE IMMEDIATE PACKAGING>', 'MINIMUM PARTICULARS TO APPEAR ON BLISTERS OR STRIPS', 'NAME OF THE MARKETING AUTHORISATION HOLDER']


dict_keys([])
Completed Heading Extraction For File
Metrics : 2: Heading Extraction,0.6033 Min,0.609705 MB,3.142645 MB,49.1%

Starting Document Annotation For File :- Zynteglo_clean_ANNEX III.json
Error Found
Completed Document Annotation
Metrics : 2: Document Annotation,0.0002 Min,0.158628 MB,0.172305 MB,49.1%

Starting Extracting Content Between Heading For File :- Zynteglo_clean_ANNEX III.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\partitionedJSONs\Zynteglo_clean_ANNEX III.json
--------------------------------------------


2021-05-25 16:45:30,178 : Flow Logger HTML_l : Completed Extracting Content Between Heading | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:45:30,179 : Flow Logger HTML_l : 2: Content Extraction,0.0038 Min,0.324378 MB,0.982644 MB,49.0%
 | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:45:30,235 : XmlGeneration_2_s : PMS/OMS Annotation Information Not Retrieved | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json
2021-05-25 16:45:30,237 : XmlGeneration_2_s : Initiating XML Generation | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json


Completed Extracting Content Between Heading
Metrics : 2: Content Extraction,0.0038 Min,0.324378 MB,0.982644 MB,49.0%

Already Exists


2021-05-25 16:45:30,418 : XmlGeneration_2_s : Writing to File:Zynteglo_clean_ANNEX III.xml | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json
2021-05-25 16:45:30,421 : Flow Logger HTML_l : 2: Generate XML,0.004 Min,0.998705 MB,1.94995 MB,49.1%
 | H | CAP |  en | HTML | Zynteglo_clean.htm


Metrics : 2: Generate XML,0.004 Min,0.998705 MB,1.94995 MB,49.1%



2021-05-25 16:45:32,244 : XML Submission Logger_2_l : Initiating Submission To FHIR Server | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json
2021-05-25 16:45:32,247 : XML Submission Logger_2_l : Response{"resourceType":"OperationOutcome","id":"650e2bd2-965e-46e4-9205-250100a13d9b","issue":[{"severity":"error","code":"exception","diagnostics":"There was an error processing your request."}]} | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json
2021-05-25 16:45:32,248 : XML Submission Logger_2_l : HTTP error occurred: 500 Server Error: Internal Server Error for url: https://ema-dap-epi-dev-fhir-api.azurewebsites.net/Bundle | H | CAP |  en | 2 | Zynteglo_clean_ANNEX III.json
Traceback (most recent call last):
  File "F:\Projects\EMA\Repository\EMA EPI PoC\function_code\code\fhirService\fhirService.py", line 60, in submitFhirXml
    response.raise_for_status()
  File "C:\Users\vipsharm\AppData\Local\Continuum\anaconda3\envs\py38\lib\site-packages\requests\models.py", line 943, in raise_for

HTTP error occurred: 500 Server Error: Internal Server Error for url: https://ema-dap-epi-dev-fhir-api.azurewebsites.net/Bundle
Error log: There was an error processing your request.
Metrics : 2: Submit FHIR Msg,0.0306 Min,0.212373 MB,0.224602 MB,49.1%

Created XML File For :- Zynteglo_clean_ANNEX III.json
Starting Heading Extraction For File :- Zynteglo_clean_ PACKAGE LEAFLET.json
File being processed: F:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\Zynteglo\en\2021-05-25T07-52-54Z\partitionedJSONs\Zynteglo_clean_ PACKAGE LEAFLET.json
--------------------------------------------
Package leaflet


2021-05-25 16:45:32,892 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Started Extracting Heading | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json
 PACKAGE LEAFLET' | Qrd txt :- 'PACKAGE LEAFLET' | Matched :- 'True' LEAFLET.json_B : Match Passed : <=4|16.67|(91, 100, 95)|0.913| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'B.
2021-05-25 16:45:32,943 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Passed As This The First Heading | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-05-25 16:45:34,765 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Match Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'This medicine is subject to additional monitoring. This will allow quick identification of new safety information. You can help by reporting any side effects you may get. See the end

----------------------------------
RemovedByStyle
----------------------------------


2021-05-25 16:45:41,387 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Match Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'What is in this leaflet' | Qrd txt :- 'What is in this leaflet' | Matched :- 'True'
2021-05-25 16:45:41,398 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Flow Is Broken | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23003' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23001'
2021-05-25 16:45:41,403 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23003' | prevHeadingCurrId :- '23001' | prevHeadingFoundId :- '23001'
2021-05-25 16:45:41,483 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Match Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '1. What Zynteglo is and what it is used for' | Qrd txt :- '1. What Zy

----------------------------------
RemovedByStyle
----------------------------------


 you need to know before you are given Zynteglo' | Qrd txt :- '2. What you need to know before you <take> <use> Zynteglo ' | Matched :- 'True'| CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '2.     What
2021-05-25 16:45:42,017 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Flow Is Broken | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-05-25 16:45:42,026 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-05-25 16:45:42,035 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Failed By Style | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'


----------------------------------
RemovedByStyle
----------------------------------


2021-05-25 16:45:43,117 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Match Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '4. Possible side effects' | Qrd txt :- '4. Possible side effects' | Matched :- 'True'
2021-05-25 16:45:43,129 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Flow Is Broken | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23019' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-05-25 16:45:43,135 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23019' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-05-25 16:45:43,143 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Failed By Style | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23019' | prevHeadingCurrId :- '' | prevHeadingFoun

----------------------------------
RemovedByStyle
----------------------------------


2021-05-25 16:45:43,492 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Match Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '5. How to store Zynteglo' | Qrd txt :- '5. How to store Zynteglo' | Matched :- 'True'
2021-05-25 16:45:43,503 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Flow Is Broken | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23022' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-05-25 16:45:43,509 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23022' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-05-25 16:45:43,520 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Failed By Style | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23022' | prevHeadingCurrId :- '' | prevHeadingFoun

----------------------------------
RemovedByStyle
----------------------------------


2021-05-25 16:45:43,964 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Match Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '6. Contents of the pack and other information' | Qrd txt :- '6. Contents of the pack and other information' | Matched :- 'True'
2021-05-25 16:45:43,974 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Flow Is Broken | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23023' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23003'
2021-05-25 16:45:43,979 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23023' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
2021-05-25 16:45:43,985 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Failed By Style | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23023' |

----------------------------------
RemovedByStyle
----------------------------------


2021-05-25 16:45:44,169 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Match Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '1. What Zynteglo is and what it is used for' | Qrd txt :- '1. What Zynteglo is and what it is used for' | Matched :- 'True'
2021-05-25 16:45:44,180 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23004' | prevHeadingCurrId :- '23003' | prevHeadingFoundId :- '23003'
 before you are given Zynteglo' | Qrd txt :- '2. What you need to know before you <take> <use> Zynteglo ' | Matched :- 'True', 88)|0.961| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- '2.       What you need to know
2021-05-25 16:45:44,309 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23005' | prevHeadingCurrId :- '2

2021-05-25 16:47:37,810 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23027' | prevHeadingCurrId :- '23026' | prevHeadingFoundId :- '23026'
 leaflet was last revised in <{MM/YYYY}>' | Qrd txt :- 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.' | Matched :- 'True'97| | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'This
2021-05-25 16:47:38,146 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23028' | prevHeadingCurrId :- '23027' | prevHeadingFoundId :- '23027'
2021-05-25 16:47:38,153 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Failed By Style | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23028' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23027'


----------------------------------
RemovedByStyle
----------------------------------


2021-05-25 16:47:43,496 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Match Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | Doc txt :- 'The following information is intended for healthcare professionals only:' | Qrd txt :- 'The following information is intended for healthcare professionals only:' | Matched :- 'True'
2021-05-25 16:47:43,510 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Flow Is Broken | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23030' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '23027'
2021-05-25 16:47:43,517 : Heading Extraction Zynteglo_clean_ PACKAGE LEAFLET.json_B : Validation Passed | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json | currHeadId :- '23030' | prevHeadingCurrId :- '23027' | prevHeadingFoundId :- '23027'
2021-05-25 16:47:55,657 : Flow Logger HTML_l : Completed Heading Extraction For File | H | CAP |  en | HTML | Zynteglo_clean.htm
2021-05-25 16:



Heading Not Found 
 ['q This medicine is subject to additional monitoring. This will allow quick identification of new safety information. You can help by reporting any side effects you may get. See the end of section 4 for how to report side effects.', 'Do not <take> <use> X', 'Children <and adolescents>', 'X with <food> <and> <,> <drink> <and> <alcohol>', 'X contains {name the excipient(s)}', 'How to <take> <use> X ', 'Use in children <and adolescents>', 'If you <take> <use> more X than you should', 'If you forget to <take> <use> X>', 'If you stop <taking> <using> X>', 'Additional side effects in children <and adolescents>', 'This leaflet was last revised in <{MM/YYYY}><{month YYYY}>.', 'Other sources of information']


dict_keys(['q This medicine is subject to additional monitoring. This will allow quick identification of new safety information. You can help by reporting any side effects you may get. See the end of section 4 for how to report side effects.', '1. What Zynteglo is a

2021-05-25 16:47:56,041 : XmlGeneration_3_B : Writing to File:Zynteglo_clean_ PACKAGE LEAFLET.xml | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json
2021-05-25 16:47:56,046 : Flow Logger HTML_l : 3: Generate XML,0.0045 Min,1.5821 MB,2.935574 MB,49.6%
 | H | CAP |  en | HTML | Zynteglo_clean.htm


Metrics : 3: Generate XML,0.0045 Min,1.5821 MB,2.935574 MB,49.6%



2021-05-25 16:47:58,300 : XML Submission Logger_3_o : Initiating Submission To FHIR Server | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json
2021-05-25 16:47:58,301 : XML Submission Logger_3_o : Response{"resourceType":"Bundle","id":"e5b0aeba-6a37-479d-852e-5d7153914c42","meta":{"versionId":"1","lastUpdated":"2021-05-25T11:17:57.684+00:00"},"type":"collection","entry":[{"fullUrl":"urn:uuid:3635f829-248b-4267-8312-2097db165cda","resource":{"resourceType":"Bundle","id":"a54d7ee0-c8e1-48f5-83e0-9b449daa611f","identifier":{"system":"http://ema.europa.eu/fhir/identifier/documentid","value":"${instance.bundle[n].Identifier}"},"type":"document","timestamp":"2021-05-25T11:17:55+00:00","entry":[{"fullUr | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json
2021-05-25 16:47:58,305 : XML Submission Logger_3_o : POST sucessful: XML added with id: e5b0aeba-6a37-479d-852e-5d7153914c42 | H | CAP |  en | 3 | Zynteglo_clean_ PACKAGE LEAFLET.json
2021-05-25 16:47:58,306 : Flow Logger HTML_

POST sucessful: XML added with id e5b0aeba-6a37-479d-852e-5d7153914c42
Metrics : 3: Submit FHIR Msg,0.0376 Min,0.044813 MB,0.992785 MB,49.5%

Created XML File For :- Zynteglo_clean_ PACKAGE LEAFLET.json
Metrics : 3: Completed,0.0 Min,0.150151 MB,0.152496 MB,49.5%

Metrics : Final Metrics,4.8699 Min,0.0 MB,9.985314 MB,49.8%



In [20]:
a

Unnamed: 0,Bold,Classes,Element,HasBorder,ID,Indexed,IsHeadingType,IsListItem,IsPossibleHeading,Italics,ParentId,Styles,Text,Underlined,Uppercased,StringLength
0,True,['MsoNormal'],"<p align=""center"" class=""MsoNormal"" style=""margin-top:0in;margin-right:-.1pt;\r margin-bottom:0in;margin-left:27.0pt;text-align:center;text-indent:-27.0pt;\r line-height:normal""><b><span lang=""EN-...",False,320efb82-a156-45a3-afdd-0e3a7e54d6da,False,,False,True,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:-.1pt;\r\nmargin-bottom:0in;margin-left:27.0pt;text-align:center;text-indent:-27.0pt;\r\nline-height:normal,ANNEX\r II,False,True,8
1,False,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:-.1pt;margin-bottom:0in;\r margin-left:0in;line-height:normal""></p>",False,f79ed8ed-bfc4-4e8b-bd49-ede302291e8a,False,,False,False,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:-.1pt;margin-bottom:0in;\r\nmargin-left:0in;line-height:normal,,False,False,0
2,True,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""><b><span lang=""EN-GB"" style='font-family:""Times New ...",False,7b2ff4b3-f27a-47ca-96a9-7ae24cfdefab,True,L1,False,True,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,A. MANUFACTURER(S) OF\r THE BIOLOGICAL ACTIVE SUBSTANCE(S) AND MANUFACTURER(S) RESPONSIBLE FOR BATCH\r RELEASE,False,True,106
3,False,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""></p>",False,d320b69e-a3c5-4764-aa31-251a32af8a17,False,,False,False,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,,False,False,0
4,True,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""><b><span lang=""EN-GB"" style='font-family:""Times New ...",False,e67b4687-f1a0-4608-988a-bcbb07029721,True,L1,False,True,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,B. CONDITIONS OR\r RESTRICTIONS REGARDING SUPPLY AND USE,False,True,54
5,False,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""></p>",False,a9013513-966a-4638-818e-90def54e0698,False,,False,False,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,,False,False,0
6,True,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""><b><span lang=""EN-GB"" style='font-family:""Times New ...",False,f9007321-3055-4120-b8ed-79e72cb470ae,True,L1,False,True,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,C. OTHER CONDITIONS AND\r REQUIREMENTS OF THE MARKETING AUTHORISATION,False,True,67
7,False,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""></p>",False,1b70d864-ef01-4bad-91f3-54dcbbe56cef,False,,False,False,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,,False,False,0
8,True,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""><b><span lang=""EN-GB"" style='font-family:""Times New ...",False,71e9cc22-be86-4f43-b932-f89b799916ba,True,L1,False,True,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,D. CONDITIONS OR\r RESTRICTIONS WITH REGARD TO THE SAFE AND EFFECTIVE USE OF THE MEDICINAL PRODUCT,False,True,96
9,False,['MsoNormal'],"<p class=""MsoNormal"" style=""margin-top:0in;margin-right:70.8pt;margin-bottom:\r 0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal""></p>",False,d7fd1fa1-ea7c-4b20-8b1b-11cbcafde70c,False,,False,False,False,e337b19e-80aa-4ffa-ac3b-5d9fa03a7fe4,margin-top:0in;margin-right:70.8pt;margin-bottom:\r\n0in;margin-left:85.05pt;text-indent:-35.4pt;line-height:normal,,False,False,0


In [69]:
convertCollectionToDataFrame(b)

NameError: name 'b' is not defined