In [9]:
import os
import zipfile
%load_ext autoreload

%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
import tracemalloc
import psutil
import pprint
import pandas as pd
import uuid
import json
import os
import glob
import re
import sys
from bs4 import NavigableString, BeautifulSoup
from collections import defaultdict
import random
import string
import time

from utils.config import config
from utils.logger.logger import loggerCreator

# ePI Modules
from parse.rulebook.rulebook import StyleRulesDictionary

from parse.extractor.parser import parserExtractor
from match.matchDocument.matchDocument import MatchDocument
from documentAnnotation.documentAnnotation import DocumentAnnotation
from htmlDocTypePartitioner.partition import DocTypePartitioner
from extractContentBetweenHeadings.dataBetweenHeadingsExtractor import DataBetweenHeadingsExtractor
from fhirXmlGenerator.fhirXmlGenerator import FhirXmlGenerator
from fhirService.fhirService import FhirService
from utils.logger.matchLogger import MatchLogger
from languageInfo.documentTypeNames.documentTypeNames import DocumentTypeNames
from listBundle.addAndUpdateListBundle.addAndUpdateListBundle import ListBundleHandler

class FolderNotFoundError(Exception):
    pass

class Metrics:
    
    def __init__(self, logFileName, logger):
        self.logFileName = logFileName
        self.start()
        self.writer = open(self.logFileName, 'a')
        self.writer.write("StepName,Time,Current Memory,Peak Memory,Used Ram Percentage\n")
        self.finalPeak = 0
        self.finalTotalTime = 0
        self.finalUsedRamPerc = 0
        self.logger = logger
    
    def start(self):
        self.startTime = time.time()
        tracemalloc.start()
    
    def getMetric(self, msg):
        
        self.endTime = time.time()
        
        self.totalTime = self.endTime - self.startTime
        
        
        current, peak = tracemalloc.get_traced_memory()
        current = current / 10**6
        peak = peak / 10**6
        
        usedRamPerc = psutil.virtual_memory()[2]
        
        self.finalPeak = max(self.finalPeak, peak)
        self.finalUsedRamPerc = max(self.finalUsedRamPerc, usedRamPerc)

        self.finalTotalTime = self.finalTotalTime + self.totalTime
        #self.finalTotalTime = round(self.finalTotalTime/60,3)
        
        outputString = f"{msg},{round(self.totalTime/60,4)} Min,{current} MB,{peak} MB,{usedRamPerc}%\n"
        
        self.logger.logFlowCheckpoint(f"{outputString}")
        
        print(f"Metrics : {outputString}")
        self.writer.write(outputString)
        tracemalloc.stop()
        tracemalloc.start()
        self.startTime = time.time()
    def end(self):
        
        current, peak = tracemalloc.get_traced_memory()
        current = current / 10**6
        outputString = f"Final Metrics,{round(self.finalTotalTime/60,4)} Min,{current} MB,{self.finalPeak} MB,{self.finalUsedRamPerc}%\n"
        print(f"Metrics : {outputString}")
        self.logger.logFlowCheckpoint(f"{outputString}")
        self.writer.write(outputString)
        self.writer.close()
        tracemalloc.stop()
        
        


def convertToInt(x):
    try:
        return str(int(x))
    except:
        return x


def convertCollectionToDataFrame(collection):

    dfExtractedHier = pd.DataFrame(collection)
    dfExtractedHier['parent_id'] = dfExtractedHier['parent_id'].apply(
        lambda x: convertToInt(x))
    dfExtractedHier['id'] = dfExtractedHier['id'].apply(
        lambda x: convertToInt(x))

    return dfExtractedHier

def getRandomString(N):
    str_ = ''.join(random.choice(string.ascii_uppercase + string.digits
                                 + string.ascii_lowercase) for _ in range(N))
    return str_


def convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog):

    module_path = os.path.join(basePath)

    if "/" in basePath:
        pathSep = "/"
    else:
        pathSep = "\\"
    
    # Generate output folder path
    output_json_path = os.path.join(basePath, 'outputJSON')

    """
        Check if input folder exists, else throw exception
    """
    if(os.path.exists(module_path)):
        filenames = glob.glob(os.path.join(module_path, htmlDocName))

        # Create language specific folder in outputJSON folder if it doesn't exist
        if(not os.path.exists(output_json_path)):
            os.mkdir(output_json_path)
        logger = MatchLogger(f'Parser_{getRandomString(1)}', htmlDocName,
                             domain, procedureType, languageCode, "HTML", fileNameLog)

        styleLogger = MatchLogger(
            f'Style Dictionary_{getRandomString(1)}', htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)

        styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                             controlBasePath=controlBasePath,
                                             language=languageCode,
                                             fileName=fileNameQrd,
                                             domain=domain,
                                             procedureType=procedureType
                                             )

        parserObj = parserExtractor(config, logger, styleRulesObj.styleRuleDict,
                                    styleRulesObj.styleFeatureKeyList,
                                    styleRulesObj.qrd_section_headings)
        
        for input_filename in filenames:
          # if(input_filename.find('Kalydeco II-86-PI-clean')!=-1):
            output_filename = os.path.join(output_json_path, htmlDocName)
            style_filepath =  output_filename.replace('.html','.txt')
            style_filepath =  style_filepath.replace('.txtl','.txt')
            style_filepath =  style_filepath.replace('.htm','.txt')
            print("-------------",style_filepath,"-----------------")

            output_filename = output_filename.replace('.html', '.json')
            output_filename = output_filename.replace('.htm', '.json')
            print(input_filename, output_filename)
            parserObj.createPIJsonFromHTML(input_filepath=input_filename,
                                           output_filepath=output_filename,
                                           style_filepath = style_filepath,
                                           img_base64_dict=parserObj.convertImgToBase64(input_filename)
                                           )
        #return parserObj, input_filename, output_filename, style_filepath
        return output_filename.split(pathSep)[-1], style_filepath
    else:
        try:    
            raise FolderNotFoundError(module_path + " not found")
        except:  
            logger.logFlowCheckpoint("Folder For Language Code Not Found In Input File")
            logger.logException("Folder For Language Code Not Found In Input File")
        raise FolderNotFoundError(module_path + " not found")
        return None


def splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog):

    styleLogger = MatchLogger(
        f'Style Dictionary_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                        controlBasePath=controlBasePath,
                                        language=languageCode,
                                        fileName=fileNameQrd,
                                        domain=domain,
                                        procedureType=procedureType
                                        )
    
    path_json = os.path.join(basePath,'outputJSON', fileNameJson)
    print("PathJson",path_json)
    partitionLogger = MatchLogger(
        f'Partition_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    partitioner = DocTypePartitioner(partitionLogger, domain, procedureType)

    partitionedJsonPaths = partitioner.partitionHtmls(
        styleRulesObj.qrd_section_headings, path_json)

    return partitionedJsonPaths


def extractAndValidateHeadings(controlBasePath,
                                basePath,
                                domain,
                                procedureType,
                                languageCode,
                                documentNumber,
                                fileNameDoc,
                                fileNameQrd,
                                fileNameMatchRuleBook,
                                fileNameDocumentTypeNames,
                                fileNameLog,
                                stopWordFilterLen=6,
                                isPackageLeaflet=False,
                                medName=None
                                ):

    if documentNumber == 0:
        topHeadingsConsidered = 4
        bottomHeadingsConsidered = 6
    elif documentNumber == 1:
        topHeadingsConsidered = 3
        bottomHeadingsConsidered = 5
    elif documentNumber == 2:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 15
    else:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 10

    print(f"Starting Heading Extraction For File :- {fileNameDoc}")
    logger = MatchLogger(f"Heading Extraction {fileNameDoc}_{getRandomString(1)}", fileNameDoc, domain, procedureType, languageCode, documentNumber, fileNameLog)
    logger.logFlowCheckpoint("Starting Heading Extraction")

    stopWordlanguage = DocumentTypeNames(
        controlBasePath=controlBasePath,
        fileNameDocumentTypeNames=fileNameDocumentTypeNames,
        languageCode=languageCode,
        domain=domain,
        procedureType=procedureType,
        documentNumber=documentNumber
        ).extractStopWordLanguage()

    matchDocObj = MatchDocument(
        logger,
        controlBasePath,
        basePath,
        domain,
        procedureType,
        languageCode,
        documentNumber,
        fileNameDoc,
        fileNameQrd,
        fileNameMatchRuleBook,
        fileNameDocumentTypeNames,
        topHeadingsConsidered,
        bottomHeadingsConsidered,
        stopWordFilterLen,
        stopWordlanguage,
        isPackageLeaflet,
        medName)
    df, coll, documentType, documentTypeForUI = matchDocObj.matchHtmlHeaddingsWithQrd()

    return df, coll, documentType, documentTypeForUI


def parseDocument(controlBasePath,
                  basePath,
                  htmlDocName,
                  fileNameQrd,
                  fileNameMatchRuleBook,
                  fileNameDocumentTypeNames,
                  jsonTempFileName,
                  listBundleDocumentTypeCodesFileName,
                  apiMmgtBaseUrl,
                  getListApiEndPointUrlSuffix,
                  addUpdateListApiEndPointUrlSuffix,
                  addBundleApiEndPointUrlSuffix,
                  apiMmgtSubsKey,
                  medName = None):
    
    listRegulatedAuthCodesAccrossePI = []
    
    if "/" in basePath:
        pathSep = "/"        
    else:
        pathSep = "\\"
    
    fileNameLog = os.path.join(basePath,'FinalLog.txt')

    pathComponents = basePath.split(pathSep)
    print(pathComponents, htmlDocName)
    timestamp = pathComponents[-1]
    languageCode =  pathComponents[-2]
    medName = pathComponents[-3]
    procedureType = pathComponents[-4]
    domain = pathComponents[-5]

    print(timestamp, languageCode, medName, procedureType, domain)
        
    flowLogger =  MatchLogger(f"Flow Logger HTML_{getRandomString(1)}", htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)
    
    metrics = Metrics(os.path.join(basePath,'Metrics.csv'),flowLogger)
    
    
    flowLogger.logFlowCheckpoint("Starting HTML Conversion To Json")
    ###Convert Html to Json
    
    fileNameJson, stylesFilePath = convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog)
    
    print("stylePath:-",stylesFilePath)
    flowLogger.logFlowCheckpoint("Completed HTML Conversion To Json")
    metrics.getMetric("HTML Conversion To Json")

    flowLogger.logFlowCheckpoint("Starting Json Split")

    ###Split Uber Json to multiple Jsons for each category.
    partitionedJsonPaths = splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog)
    
    partitionedJsonPaths = [ path.split(pathSep)[-1] for path in partitionedJsonPaths]
    flowLogger.logFlowCheckpoint(str(partitionedJsonPaths))
    
    flowLogger.logFlowCheckpoint("Completed Json Split")
    metrics.getMetric("Split Json")
    
    flowLogger.logFlowCheckpoint("Started Processing Partitioned Jsons")
    previous_pms_oms_annotation_data  = None
    for index, fileNamePartitioned in enumerate(partitionedJsonPaths):
        print("Index", index)
        #if index in [1,2,3]:
        #    continue
        
        flowLogger.logFlowCheckpoint(f"\n\n\n\n||||||||||||||||||||||||||||||||{str(index)} ||||| {str(fileNamePartitioned)}||||||||||||||||||||||||||||||||\n\n\n\n")
        
        if index == 3:
            stopWordFilterLen = 100
            isPackageLeaflet = True
        else:
            stopWordFilterLen = 6
            isPackageLeaflet = False
            
        df, coll, documentType, documentTypeForUI = extractAndValidateHeadings(controlBasePath,
                                    basePath,
                                    domain,
                                    procedureType,
                                    languageCode,
                                    index,
                                    fileNamePartitioned,
                                    fileNameQrd,
                                    fileNameMatchRuleBook,
                                    fileNameDocumentTypeNames,
                                    fileNameLog,
                                    stopWordFilterLen=stopWordFilterLen,
                                    isPackageLeaflet=isPackageLeaflet,
                                    medName=medName)
        #return df, coll, documentType, documentTypeForUI
        print(f"Completed Heading Extraction For File")
        flowLogger.logFlowCheckpoint("Completed Heading Extraction For File")
        metrics.getMetric(f"{index}: Heading Extraction")

        print(f"Starting Document Annotation For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Document Annotation For File")
        documentAnnotationObj = DocumentAnnotation(fileNamePartitioned,'c20835db4b1b4e108828a8537ff41506','f54d15d1e39543d5a4843f4a6ed6c328',"https://spor-sit.azure-api.net","/pms/api/v2/","/sms/api/v2/",df,coll, index)
        #try
        #    pms_oms_annotation_data = documentAnnotationObj.processRegulatedAuthorizationForDoc()
        #    print(pms_oms_annotation_data)
        #except Exception as e:
        #    pms_oms_annotation_data = None
        #    print("Error Found", str(e))
        pms_oms_annotation_data = documentAnnotationObj.processRegulatedAuthorizationForDoc()
        print(pms_oms_annotation_data)
        
        if pms_oms_annotation_data == None:
            pms_oms_annotation_data = previous_pms_oms_annotation_data
        else:
            previous_pms_oms_annotation_data = pms_oms_annotation_data
        
        print(f"Completed Document Annotation")        
        flowLogger.logFlowCheckpoint("Completed Document Annotation")
        metrics.getMetric(f"{index}: Document Annotation")
        
        print(f"Starting Extracting Content Between Heading For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Extracting Content Between Heading")
        
        extractContentlogger =  MatchLogger(f'ExtractContentBetween_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        extractorObj = DataBetweenHeadingsExtractor(extractContentlogger, basePath, coll)
        dfExtractedHierRR = extractorObj.extractContentBetweenHeadings(fileNamePartitioned)
        
        print(f"Completed Extracting Content Between Heading")        
        flowLogger.logFlowCheckpoint("Completed Extracting Content Between Heading")
        metrics.getMetric(f"{index}: Content Extraction")
        
        
        xmlLogger =  MatchLogger(f'XmlGeneration_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        
        listBundleDocumentTypeCodesFilePath = os.path.join(controlBasePath,
                                                                listBundleDocumentTypeCodesFileName.split(".")[0],
                                                                listBundleDocumentTypeCodesFileName)
        with open(listBundleDocumentTypeCodesFilePath, encoding='utf-8') as f:
            listBundleDocumentTypeCodes = json.load(f)
            
        bundleDocumentTypeCode = listBundleDocumentTypeCodes[domain][str(index)]['listBundleCode']
        bundleMetaData = {'pmsOmsAnnotationData':pms_oms_annotation_data,
                          'documentTypeCode': bundleDocumentTypeCode,
                          'documentType': documentTypeForUI,
                          'languageCode': languageCode,
                          'medName': medName}
        
        fhirXmlGeneratorObj = FhirXmlGenerator(xmlLogger, controlBasePath, basePath, bundleMetaData, stylesFilePath)
        fileNameXml = fileNamePartitioned.replace('.json','.xml')
        generatedXml = fhirXmlGeneratorObj.generateXml(dfExtractedHierRR, fileNameXml)
        
        metrics.getMetric(f"{index}: Generate XML")
        
        fhirServiceLogger =  MatchLogger(f'XML Submission Logger_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        
        fhirServiceObj = FhirService(fhirServiceLogger, apiMmgtBaseUrl, addBundleApiEndPointUrlSuffix, apiMmgtSubsKey, basePath, generatedXml)
        fhirServiceObj.submitFhirXml()
        
        
        
        
        metrics.getMetric(f"{index}: Submit FHIR Msg")
        
        print(f"Created XML File For :- {fileNamePartitioned}")
        
        flowLogger.logFlowCheckpoint("Starting list bundle update/addition")
        if documentAnnotationObj.listRegulatedAuthorizationIdentifiers != None:
            for id in documentAnnotationObj.listRegulatedAuthorizationIdentifiers:
                listRegulatedAuthCodesAccrossePI.append(id)
        listBundleLogger =  MatchLogger(f'List Bundle Creation Logger_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        print("\nlistRegulatedAuthCodesAccrossePI",listRegulatedAuthCodesAccrossePI)
        #try:
        listBundleHandler = ListBundleHandler(listBundleLogger,
                 domain,
                 procedureType,
                 index,
                 documentType,
                 documentTypeForUI,
                 languageCode,
                 medName,
                 controlBasePath,
                 jsonTempFileName,
                 listBundleDocumentTypeCodesFileName,
                 fileNameDocumentTypeNames,
                 listRegulatedAuthCodesAccrossePI,
                 apiMmgtBaseUrl,
                 getListApiEndPointUrlSuffix,
                 addUpdateListApiEndPointUrlSuffix,
                 apiMmgtSubsKey)

        listBundleXml = listBundleHandler.addOrUpdateDocumentItem(str(fhirServiceObj.SubmittedFhirMsgRefId), pms_oms_annotation_data)
        print(listBundleXml)
        listBundleHandler.submitListXmLToServer(listBundleXml)

        flowLogger.logFlowCheckpoint("Completed list bundle update/addition")
        metrics.getMetric(f"{index}: Update/Add List Bundle")
            #return df,coll,dfExtractedHierRR
        #except Exception as e:
        #    print(str(e))
        #    if 'No MAN Code found' in str(e):
        #        flowLogger.logFlowCheckpoint("Skipping list bundle addtion/update as no MAN found")
            
    
    flowLogger.logFlowCheckpoint("Completed Processing Partitioned Jsons")
    metrics.getMetric(f"{index}: Completed")
    metrics.end()

In [None]:
from wordToHtmlConvertor.wordToHtmlConvertor import WordToHtmlConvertor

wordToHtmlConvertorObj = WordToHtmlConvertor()
wordToHtmlConvertorObj.convertWordToHTML()

In [11]:
def runAllTest(inputList):
    for inputDoc in inputList:
        # inputZipFolderPath = "F:\Projects\EMA\Repository\EMA EPI PoC\\function_code\\inputblob"
        inputZipFolderPath = os.path.abspath(os.path.join('..'))
        inputZipFolderPath = os.path.join(inputZipFolderPath, 'inputblob')
        inputZipFileName = inputDoc
        
        fileNameQrd = 'qrd_canonical_model.csv'
        fileNameMatchRuleBook = 'ruleDict.json'
        fileNameDocumentTypeNames = 'documentTypeNames.json'
        fsMountName = '/mounted'
        jsonTempFileName = 'listBundleJsonTemplate.json'
        listBundleDocumentTypeCodesFileName = 'listBundleDocumentTypeCodes.json'
        apiMmgtBaseUrl = "https://ema-dap-epi-tst-fhir-apim.azure-api.net"
        getListApiEndPointUrlSuffix = "/epi/v1/List"
        addUpdateListApiEndPointUrlSuffix = "/epi-w/v1/List"
        addBundleApiEndPointUrlSuffix = "/epi-w/v1/Bundle"
        apiMmgtSubsKey = "9e9d47b8a08148f9833e17462d90574a"




        info = inputZipFileName.split("~")

        try:
            medName = info[0]
            domain = info[1]
            procedureType = info[2]
            languageCode = info[3]
            timestamp = info[4]
            timestamp = timestamp.replace(".zip","")

        except Exception:
            raise f"Missing required info in the zip file name {inputZipFileName}"

        if "\\" in os.getcwd():
            localEnv = True
            inputZipFolderPath = os.path.join(os.path.abspath(os.path.join('..')),inputZipFolderPath)
            outputFolderPath = os.path.join(os.path.abspath(os.path.join('..')), 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
            controlFolderPath = os.path.join(os.path.abspath(os.path.join('..')),'control')
        else:
            localEnv = False
            inputZipFolderPath = os.path.join(f'{fsMountName}',inputZipFolderPath)
            outputFolderPath = os.path.join(f'{fsMountName}', 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
            controlFolderPath = os.path.join(f'{fsMountName}','control')


        print(inputZipFileName, inputZipFolderPath, outputFolderPath, controlFolderPath)

        mode = 0o666

        if localEnv is True:
            inputZipFolderPath = inputZipFolderPath.replace("/","\\")
            outputFolderPath = outputFolderPath.replace("/","\\")
            controlFolderPath = controlFolderPath.replace("/","\\")

        try:
            os.makedirs(inputZipFolderPath, mode)
            os.makedirs(outputFolderPath, mode)
            os.makedirs(controlFolderPath, mode)

        except Exception:
            print("Already Present")

        with zipfile.ZipFile(f'{inputZipFolderPath}/{inputZipFileName}',"r") as zip_ref:
                zip_ref.extractall(outputFolderPath)


        _,_,fileNames = next(os.walk(outputFolderPath))
        htmlFileName = [fileName for fileName in fileNames if ".htm" in fileName][0]

        print(htmlFileName)

        parseDocument(controlFolderPath,
                  outputFolderPath,
                  htmlFileName,
                  fileNameQrd,
                  fileNameMatchRuleBook,
                  fileNameDocumentTypeNames,
                  jsonTempFileName,
                  listBundleDocumentTypeCodesFileName,
                  apiMmgtBaseUrl,
                  getListApiEndPointUrlSuffix,
                  addUpdateListApiEndPointUrlSuffix,
                  addBundleApiEndPointUrlSuffix,
                  apiMmgtSubsKey,
                  medName)


In [8]:

inputList = [
            'GONAL-f~H~CAP~el~2021-06-06T10-02-52Z.zip',
             'Ovaleap~H~CAP~el~2021-06-07T09-07-39Z.zip',
            'Karvea~H~CAP~el~2021-05-21T11-44-12Z.zip']
runAllTest(inputList)


GONAL-f~H~CAP~el~2021-06-06T10-02-52Z.zip D:\Projects\EMA\Repository\EMA EPI PoC\function_code\inputblob D:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\GONAL-f\el\2021-06-06T10-02-52Z D:\Projects\EMA\Repository\EMA EPI PoC\function_code\control
Already Present
gonal-f_clean.htm

2021-06-25 03:08:25,158 : Flow Logger HTML_d : Starting HTML Conversion To Json | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:08:25,173 : Style Dictionary_1 : Reading style dictionary in file: rule_dictionary_el.json | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:08:25,220 : Style Dictionary_1 : Qrd Section Keys Retrieved For Style Dictionary: ΠΕΡΙΛΗΨΗ ΤΩΝ ΧΑΡΑΚΤΗΡΙΣΤΙΚΩΝ ΤΟΥ ΠΡΟΪΟΝΤΟΣ, ΠΑΡΑΡΤΗΜΑ ΙΙ, Α. ΕΠΙΣΗΜΑΝΣΗ, Β. ΦΥΛΛΟ ΟΔΗΓΙΩΝ ΧΡΗΣΗΣ | H | CAP |  el | HTML | gonal-f_clean.htm



['D:', 'Projects', 'EMA', 'Repository', 'EMA EPI PoC', 'function_code', 'work', 'H', 'CAP', 'GONAL-f', 'el', '2021-06-06T10-02-52Z'] gonal-f_clean.htm
2021-06-06T10-02-52Z el GONAL-f CAP H
------------- D:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\GONAL-f\el\2021-06-06T10-02-52Z\outputJSON\gonal-f_clean.txt -----------------
D:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\GONAL-f\el\2021-06-06T10-02-52Z\gonal-f_clean.htm D:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\GONAL-f\el\2021-06-06T10-02-52Z\outputJSON\gonal-f_clean.json


2021-06-25 03:08:30,683 : Parser_N : Style Information Stored In File: D:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\GONAL-f\el\2021-06-06T10-02-52Z\outputJSON\gonal-f_clean.txt | H | CAP |  el | HTML | gonal-f_clean.htm


!!!!!!!!!!! IN TABLE !!!!!!!!!!!!!!!!!
!!!!!!!!!!! IN TABLE !!!!!!!!!!!!!!!!!
!!!!!!!!!!! IN TABLE !!!!!!!!!!!!!!!!!
!!!!!!!!!!! IN TABLE !!!!!!!!!!!!!!!!!
!!!!!!!!!!! IN TABLE !!!!!!!!!!!!!!!!!


2021-06-25 03:08:48,976 : Parser_N : Writing to file: D:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\GONAL-f\el\2021-06-06T10-02-52Z\outputJSON\gonal-f_clean.json | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:08:50,183 : Flow Logger HTML_d : Completed HTML Conversion To Json | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:08:50,185 : Flow Logger HTML_d : HTML Conversion To Json,0.4171 Min,15.881875 MB,57.408925 MB,85.7%
 | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:08:50,209 : Flow Logger HTML_d : Starting Json Split | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:08:50,218 : Style Dictionary_5 : Reading style dictionary in file: rule_dictionary_el.json | H | CAP |  el | Json | gonal-f_clean.json
2021-06-25 03:08:50,266 : Style Dictionary_5 : Qrd Section Keys Retrieved For Style Dictionary: ΠΕΡΙΛΗΨΗ ΤΩΝ ΧΑΡΑΚΤΗΡΙΣΤΙΚΩΝ ΤΟΥ ΠΡΟΪΟΝΤΟΣ, ΠΑΡΑΡΤΗΜΑ ΙΙ, Α. ΕΠΙΣΗΜΑΝΣΗ, Β. ΦΥΛΛΟ ΟΔΗΓΙΩΝ ΧΡΗΣΗΣ | H | CAP |  el | Json | gonal-f_clean.json

stylePath:- D:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\GONAL-f\el\2021-06-06T10-02-52Z\outputJSON\gonal-f_clean.txt
Metrics : HTML Conversion To Json,0.4171 Min,15.881875 MB,57.408925 MB,85.7%

PathJson D:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\GONAL-f\el\2021-06-06T10-02-52Z\outputJSON\gonal-f_clean.json
Finding Heading  SmPC 


textHtml1 | ΠΑΡΑΡΤΗΜΑΙ | textQrd1 | ΠΑΡΑΡΤΗΜΑΙΙ | 0.988
textHtml1 | ΠΑΡΑΡΤΗΜΑΙΙ | textQrd1 | ΠΑΡΑΡΤΗΜΑΙΙ | 1
textHtml1 | ΠΑΡΑΡΤΗΜΑΙΙΙ | textQrd1 | ΠΑΡΑΡΤΗΜΑΙΙ | 0.989


2021-06-25 03:08:52,360 : Partition_B : Writing partition to file: D:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\GONAL-f\el\2021-06-06T10-02-52Z\partitionedJSONs\gonal-f_clean_SmPC.json | H | CAP |  el | Json | gonal-f_clean.json


endPositions [(24, 0.988), (1033, 1), (1117, 0.989)]
startPos,endPos :  0 1033
startPos,endPos :  0 1009
Finding Heading  ΠΑΡΑΡΤΗΜΑ ΙΙ 


textHtml1 | Α.ΕΠΙΣΗΜΑΝΣΗ | textQrd1 | Α.ΕΠΙΣΗΜΑΝΣΗ | 1


2021-06-25 03:08:54,113 : Partition_B : Writing partition to file: D:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\GONAL-f\el\2021-06-06T10-02-52Z\partitionedJSONs\gonal-f_clean_ΠΑΡΑΡΤΗΜΑ ΙΙ.json | H | CAP |  el | Json | gonal-f_clean.json


endPositions [(1147, 1)]
startPos,endPos :  1009 1147
startPos,endPos :  1009 1123
Finding Heading  Α. ΕΠΙΣΗΜΑΝΣΗ 


textHtml1 | Β.ΦΥΛΛΟΟΔΗΓΙΩΝΧΡΗΣΗΣ | textQrd1 | Β.ΦΥΛΛΟΟΔΗΓΙΩΝΧΡΗΣΗΣ | 1


2021-06-25 03:08:56,360 : Partition_B : Writing partition to file: D:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\GONAL-f\el\2021-06-06T10-02-52Z\partitionedJSONs\gonal-f_clean_Α. ΕΠΙΣΗΜΑΝΣΗ.json | H | CAP |  el | Json | gonal-f_clean.json
2021-06-25 03:08:56,416 : Partition_B : Writing partition to file: D:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\GONAL-f\el\2021-06-06T10-02-52Z\partitionedJSONs\gonal-f_clean_Β. ΦΥΛΛΟ ΟΔΗΓΙΩΝ ΧΡΗΣΗΣ.json | H | CAP |  el | Json | gonal-f_clean.json


endPositions [(2538, 1)]
startPos,endPos :  1123 2538
startPos,endPos :  1123 2514
Finding Heading  Β. ΦΥΛΛΟ ΟΔΗΓΙΩΝ ΧΡΗΣΗΣ 


startPos,endPos :  2514 3597


2021-06-25 03:08:56,584 : Flow Logger HTML_d : ['gonal-f_clean_SmPC.json', 'gonal-f_clean_ΠΑΡΑΡΤΗΜΑ ΙΙ.json', 'gonal-f_clean_Α. ΕΠΙΣΗΜΑΝΣΗ.json', 'gonal-f_clean_Β. ΦΥΛΛΟ ΟΔΗΓΙΩΝ ΧΡΗΣΗΣ.json'] | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:08:56,586 : Flow Logger HTML_d : Completed Json Split | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:08:56,589 : Flow Logger HTML_d : Split Json,0.1063 Min,0.371909 MB,68.107763 MB,85.5%
 | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:08:56,594 : Flow Logger HTML_d : Started Processing Partitioned Jsons | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:08:56,600 : Flow Logger HTML_d : 



||||||||||||||||||||||||||||||||0 ||||| gonal-f_clean_SmPC.json||||||||||||||||||||||||||||||||



 | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:08:56,608 : Heading Extraction gonal-f_clean_SmPC.json_S : Starting Heading Extraction | H | CAP |  el | 0 | gonal-f_clean_SmPC.json


Metrics : Split Json,0.1063 Min,0.371909 MB,68.107763 MB,85.5%

Index 0
Starting Heading Extraction For File :- gonal-f_clean_SmPC.json
File being processed: D:\Projects\EMA\Repository\EMA EPI PoC\function_code\work\H\CAP\GONAL-f\el\2021-06-06T10-02-52Z\partitionedJSONs\gonal-f_clean_SmPC.json
--------------------------------------------
SmPC


2021-06-25 03:08:57,432 : Heading Extraction gonal-f_clean_SmPC.json_S : Started Extracting Heading | H | CAP |  el | 0 | gonal-f_clean_SmPC.json
2021-06-25 03:08:58,001 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- 'ΠΕΡΙΛΗΨΗ ΤΩΝ ΧΑΡΑΚΤΗΡΙΣΤΙΚΩΝ ΤΟΥ ΠΡΟΪΟΝΤΟΣ' | Qrd txt :- 'ΠΕΡΙΛΗΨΗ ΤΩΝ ΧΑΡΑΚΤΗΡΙΣΤΙΚΩΝ ΤΟΥ ΠΡΟΪΟΝΤΟΣ' | Matched :- 'True'
2021-06-25 03:08:58,009 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed As This The First Heading | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20001' | prevHeadingCurrId :- '' | prevHeadingFoundId :- ''
2021-06-25 03:08:58,127 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- '1. ΟΝΟΜΑΣΙΑ ΤΟΥ ΦΑΡΜΑΚΕΥΤΙΚΟΥ ΠΡΟΪΟΝΤΟΣ' | Qrd txt :- '1. ΟΝΟΜΑΣΙΑ ΤΟΥ ΦΑΡΜΑΚΕΥΤΙΚΟΥ ΠΡΟΪΟΝΤΟΣ' | Matched :- 'True'
2021-06-25 03:08:58,138 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation 


OriginalCheck



2021-06-25 03:09:26,427 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- 'Παιδιατρικός πληθυσμός' | Qrd txt :- 'Παιδιατρικός πληθυσμός' | Matched :- 'True'
2021-06-25 03:09:26,449 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20013' | prevHeadingCurrId :- '20011' | prevHeadingFoundId :- '20011'
2021-06-25 03:09:27,076 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- 'Τρόπος χορήγησης' | Qrd txt :- 'Τρόπος χορήγησης' | Matched :- 'True'
2021-06-25 03:09:27,098 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20014' | prevHeadingCurrId :- '20011' | prevHeadingFoundId :- '20011'



OriginalCheck



2021-06-25 03:09:31,244 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- '4.3 Αντενδείξεις' | Qrd txt :- '4.3 Αντενδείξεις' | Matched :- 'True'
2021-06-25 03:09:31,255 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20016' | prevHeadingCurrId :- '20011' | prevHeadingFoundId :- '20011'
2021-06-25 03:09:37,783 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- '4.4 Ειδικές προειδοποιήσεις και προφυλάξεις κατά τη χρήση' | Qrd txt :- '4.4 Ειδικές προειδοποιήσεις και προφυλάξεις κατά τη χρήση' | Matched :- 'True'
2021-06-25 03:09:37,794 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20017' | prevHeadingCurrId :- '20016' | prevHeadingFoundId :- '20016'
2021-06-25 03:09:37,967 : Heading Extr


OriginalCheck



2021-06-25 03:09:54,884 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- '4.7 Επιδράσεις στην ικανότητα οδήγησης και χειρισμού μηχανημάτων' | Qrd txt :- '4.7 Επιδράσεις στην ικανότητα οδήγησης και χειρισμού μηχανημάτων' | Matched :- 'True'
2021-06-25 03:09:54,897 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20026' | prevHeadingCurrId :- '20022' | prevHeadingFoundId :- '20022'
2021-06-25 03:09:55,580 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- '4.8 Ανεπιθύμητες ενέργειες' | Qrd txt :- '4.8 Ανεπιθύμητες ενέργειες' | Matched :- 'True'
2021-06-25 03:09:55,593 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20027' | prevHeadingCurrId :- '20026' | prevHeadingFoundId :- '20026'
2021


OriginalCheck



2021-06-25 03:10:20,379 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- '5. ΦΑΡΜΑΚΟΛΟΓΙΚΕΣ ΙΔΙΟΤΗΤΕΣ' | Qrd txt :- '5. ΦΑΡΜΑΚΟΛΟΓΙΚΕΣ ΙΔΙΟΤΗΤΕΣ' | Matched :- 'True'
2021-06-25 03:10:20,387 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20032' | prevHeadingCurrId :- '20030' | prevHeadingFoundId :- '20030'
2021-06-25 03:10:20,758 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- '5.1 Φαρμακοδυναμικές ιδιότητες' | Qrd txt :- '5.1 Φαρμακοδυναμικές ιδιότητες' | Matched :- 'True'
2021-06-25 03:10:20,771 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20033' | prevHeadingCurrId :- '20032' | prevHeadingFoundId :- '20032'
2021-06-25 03:10:21,736 : Heading Extraction gonal-f_clean_SmPC.json_S

2021-06-25 03:11:05,170 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20057' | prevHeadingCurrId :- '20056' | prevHeadingFoundId :- '20056'
2021-06-25 03:11:07,479 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- '10. ΗΜΕΡΟΜΗΝΙΑ ΑΝΑΘΕΩΡΗΣΗΣ ΤΟΥ ΚΕΙΜΕΝΟΥ' | Qrd txt :- '10. ΗΜΕΡΟΜΗΝΙΑ ΑΝΑΘΕΩΡΗΣΗΣ ΤΟΥ ΚΕΙΜΕΝΟΥ' | Matched :- 'True'
2021-06-25 03:11:07,490 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20058' | prevHeadingCurrId :- '20057' | prevHeadingFoundId :- '20057'
2021-06-25 03:11:08,285 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- '1. ΟΝΟΜΑΣΙΑ ΤΟΥ ΦΑΡΜΑΚΕΥΤΙΚΟΥ ΠΡΟΪΟΝΤΟΣ' | Qrd txt :- '1. ΟΝΟΜΑΣΙΑ ΤΟΥ ΦΑΡΜΑΚΕΥΤΙΚΟΥ ΠΡΟΪΟΝΤΟΣ' | Matched :- 'True'
2021-06-25 03:11:08,298 : He

oooooooooooooooooooooooooooooooooooooooo END OF Sub Section oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo


2021-06-25 03:11:13,080 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- '2. ΠΟΙΟΤΙΚΗ ΚΑΙ ΠΟΣΟΤΙΚΗ ΣΥΝΘΕΣΗ' | Qrd txt :- '2. ΠΟΙΟΤΙΚΗ ΚΑΙ ΠΟΣΟΤΙΚΗ ΣΥΝΘΕΣΗ' | Matched :- 'True'
2021-06-25 03:11:13,091 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20004' | prevHeadingCurrId :- '20003' | prevHeadingFoundId :- '20003'
2021-06-25 03:11:18,546 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- '3. ΦΑΡΜΑΚΟΤΕΧΝΙΚΗ ΜΟΡΦΗ' | Qrd txt :- '3. ΦΑΡΜΑΚΟΤΕΧΝΙΚΗ ΜΟΡΦΗ' | Matched :- 'True'
2021-06-25 03:11:18,568 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Flow Is Broken | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20008' | prevHeadingCurrId :- '' | prevHeadingFoundId :- '20004'
2021-06-25 03:11:18,578 : Heading Extraction gonal-f_clean_SmPC.json_S 


OriginalCheck



2021-06-25 03:11:32,353 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- 'Παιδιατρικός πληθυσμός' | Qrd txt :- 'Παιδιατρικός πληθυσμός' | Matched :- 'True'
2021-06-25 03:11:32,372 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20013' | prevHeadingCurrId :- '20011' | prevHeadingFoundId :- '20011'
2021-06-25 03:11:32,915 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- 'Τρόπος χορήγησης' | Qrd txt :- 'Τρόπος χορήγησης' | Matched :- 'True'
2021-06-25 03:11:32,931 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20014' | prevHeadingCurrId :- '20011' | prevHeadingFoundId :- '20011'



OriginalCheck



2021-06-25 03:11:34,165 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- '4.3 Αντενδείξεις' | Qrd txt :- '4.3 Αντενδείξεις' | Matched :- 'True'
2021-06-25 03:11:34,179 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20016' | prevHeadingCurrId :- '20011' | prevHeadingFoundId :- '20011'
2021-06-25 03:11:40,036 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- '4.4 Ειδικές προειδοποιήσεις και προφυλάξεις κατά τη χρήση' | Qrd txt :- '4.4 Ειδικές προειδοποιήσεις και προφυλάξεις κατά τη χρήση' | Matched :- 'True'
2021-06-25 03:11:40,047 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20017' | prevHeadingCurrId :- '20016' | prevHeadingFoundId :- '20016'
2021-06-25 03:11:40,187 : Heading Extr


OriginalCheck



2021-06-25 03:11:56,584 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- '4.7 Επιδράσεις στην ικανότητα οδήγησης και χειρισμού μηχανημάτων' | Qrd txt :- '4.7 Επιδράσεις στην ικανότητα οδήγησης και χειρισμού μηχανημάτων' | Matched :- 'True'
2021-06-25 03:11:56,593 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20026' | prevHeadingCurrId :- '20022' | prevHeadingFoundId :- '20022'
2021-06-25 03:11:57,211 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- '4.8 Ανεπιθύμητες ενέργειες' | Qrd txt :- '4.8 Ανεπιθύμητες ενέργειες' | Matched :- 'True'
2021-06-25 03:11:57,220 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20027' | prevHeadingCurrId :- '20026' | prevHeadingFoundId :- '20026'
2021


OriginalCheck



2021-06-25 03:12:27,021 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- '5. ΦΑΡΜΑΚΟΛΟΓΙΚΕΣ ΙΔΙΟΤΗΤΕΣ' | Qrd txt :- '5. ΦΑΡΜΑΚΟΛΟΓΙΚΕΣ ΙΔΙΟΤΗΤΕΣ' | Matched :- 'True'
2021-06-25 03:12:27,037 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20032' | prevHeadingCurrId :- '20030' | prevHeadingFoundId :- '20030'
2021-06-25 03:12:27,538 : Heading Extraction gonal-f_clean_SmPC.json_S : Match Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | Doc txt :- '5.1 Φαρμακοδυναμικές ιδιότητες' | Qrd txt :- '5.1 Φαρμακοδυναμικές ιδιότητες' | Matched :- 'True'
2021-06-25 03:12:27,545 : Heading Extraction gonal-f_clean_SmPC.json_S : Validation Passed | H | CAP |  el | 0 | gonal-f_clean_SmPC.json | currHeadId :- '20033' | prevHeadingCurrId :- '20032' | prevHeadingFoundId :- '20032'
2021-06-25 03:12:28,473 : Heading Extraction gonal-f_clean_SmPC.json_S

2021-06-25 03:13:08,121 : Flow Logger HTML_d : Completed Heading Extraction For File | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:13:08,123 : Flow Logger HTML_d : 0: Heading Extraction,4.1921 Min,10.940367 MB,38.401202 MB,84.7%
 | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:13:08,129 : Flow Logger HTML_d : Starting Document Annotation For File | H | CAP |  el | HTML | gonal-f_clean.htm




Heading Not Found 
 ['nan,qΤο φάρμακο αυτό τελεί υπό συμπληρωματική παρακολούθηση. Αυτό θα επιτρέψει το γρήγορο προσδιορισμό νέων πληροφοριών ασφάλειας. Ζητείται από τους επαγγελματίες υγείας να αναφέρουν οποιεσδήποτε πιθανολογούμενες ανεπιθύμητες ενέργειες. Βλ. παράγραφο 4.8 για τον τρόπο αναφοράς ανεπιθύμητων ενεργειών.,2', '2.1,Γενική περιγραφή,5', '2.2,Ποιοτική και ποσοτική σύνθεση,6', 'nan,Προφυλάξεις που πρέπει να ληφθούν πριν από τον χειρισμό ή τη χορήγηση του φαρμακευτικού προϊόντος,15', 'nan,Παιδιατρικός πληθυσμός,19', 'nan,Παιδιατρικός πληθυσμός,21', 'nan,Κύηση,23', 'nan,Παιδιατρικός πληθυσμός,28', 'nan,Παιδιατρικός πληθυσμός,31', 'nan,Μηχανισμός δράσης,34', 'nan,Φαρμακοδυναμικές επιδράσεις,35', 'nan,Παιδιατρικός πληθυσμός,37', 'nan,Απορρόφηση,39', 'nan,Κατανομή,40', 'nan,Βιομετασχηματισμός,41', 'nan,Αποβολή,42', 'nan,Γραμμικότητα/μη γραμμικότητα,43', 'nan,Φαρμακοκινητικές/φαρμακοδυναμικές σχέσεις,44', 'nan,Αξιολόγηση περιβαλλοντικού κινδύνου,46', 'nan,Χρήση στον παιδιατρικ

2021-06-25 03:13:39,307 : Flow Logger HTML_d : Completed Document Annotation | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:13:39,310 : Flow Logger HTML_d : 0: Document Annotation,0.5197 Min,0.20462 MB,0.351028 MB,84.4%
 | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:13:39,312 : Flow Logger HTML_d : Starting Extracting Content Between Heading | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:13:39,320 : ExtractContentBetween_0_x : Cleaning Match Results | H | CAP |  el | 0 | gonal-f_clean_SmPC.json
2021-06-25 03:13:39,331 : ExtractContentBetween_0_x : Finished Cleaning Match Results | H | CAP |  el | 0 | gonal-f_clean_SmPC.json


Found entry with code 220000000061
Found entry with code 220000000061
Found entry with code 220000000061
Found entry with code 220000000061
Found entry with code 220000000061
Found entry with code 220000000061
Found entry with code 220000000061
Found entry with code 220000000061
Found entry with code 220000000061
Found entry with code 220000000061
{'Author Value': 'Merck Europe B.V.', 'Author Reference': 'http://spor.ema.europa.eu/v1/locations/LOC-100013871', 'Medicinal Product Definitions': [('600000003828', 'GONAL-f 75 IU (5.5 µg) - Powder and solvent for solution for injection', []), ('600000002830', 'GONAL-f 1050 IU/1.75 ml (77 µg/1.75 ml) - Powder and solvent for solution for injection', []), ('600000002832', 'GONAL-f 450 IU/0.75 ml (33 µg/0.75 ml) - Powder and solvent for solution for injection', []), ('600000002834', 'GONAL-f 300 IU/0.5 ml (22 µg/0.5 ml) - Solution for injection in pre-filled pen', []), ('600000003451', 'GONAL-f 450 IU/0.75 ml (33 µg/0.75 ml) - Solution for inje

2021-06-25 03:13:39,744 : Flow Logger HTML_d : Completed Extracting Content Between Heading | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:13:39,746 : Flow Logger HTML_d : 0: Content Extraction,0.0072 Min,0.554062 MB,38.432201 MB,84.3%
 | H | CAP |  el | HTML | gonal-f_clean.htm
2021-06-25 03:13:39,800 : XmlGeneration_0_T : Initiating XML Generation | H | CAP |  el | 0 | gonal-f_clean_SmPC.json


Completed Extracting Content Between Heading
Metrics : 0: Content Extraction,0.0072 Min,0.554062 MB,38.432201 MB,84.3%

Already Exists


2021-06-25 03:13:40,201 : XmlGeneration_0_T : Writing to File:gonal-f_clean_SmPC.xml | H | CAP |  el | 0 | gonal-f_clean_SmPC.json
2021-06-25 03:13:40,207 : Flow Logger HTML_d : 0: Generate XML,0.0076 Min,3.0793 MB,4.951415 MB,84.4%
 | H | CAP |  el | HTML | gonal-f_clean.htm


Metrics : 0: Generate XML,0.0076 Min,3.0793 MB,4.951415 MB,84.4%



KeyboardInterrupt: 