In [1]:
import os
import zipfile
%load_ext autoreload

%autoreload 2

In [2]:
import pprint
import pandas as pd
import uuid
import json
import os
import glob
import re
import sys
from bs4 import NavigableString, BeautifulSoup
from collections import defaultdict
import random
import string

from utils.config import config
from utils.logger.logger import loggerCreator

# ePI Modules
from parse.rulebook.rulebook import StyleRulesDictionary

from parse.extractor.parser import parserExtractor
from match.matchDocument.matchDocument import MatchDocument
from documentAnnotation.documentAnnotation import DocumentAnnotation
from htmlDocTypePartitioner.partition import DocTypePartitioner
from extractContentBetweenHeadings.dataBetweenHeadingsExtractor import DataBetweenHeadingsExtractor
from fhirXmlGenerator.fhirXmlGenerator import FhirXmlGenerator
from fhirService.fhirService import FhirService
from utils.logger.matchLogger import MatchLogger
from languageInfo.documentTypeNames.documentTypeNames import DocumentTypeNames
from wordToHtmlConvertor.wordToHtmlConvertor import WordToHtmlConvertor


class FolderNotFoundError(Exception):
    pass


def getRandomString(N):
    str_ = ''.join(random.choice(string.ascii_uppercase + string.digits
                                 + string.ascii_lowercase) for _ in range(N))
    return str_


def convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog):

    module_path = os.path.join(basePath)

    if "/" in basePath:
        pathSep = "/"
    else:
        pathSep = "\\"
    
    # Generate output folder path
    output_json_path = os.path.join(basePath, 'outputJSON')

    """
        Check if input folder exists, else throw exception
    """
    if(os.path.exists(module_path)):
        filenames = glob.glob(os.path.join(module_path, htmlDocName))

        # Create language specific folder in outputJSON folder if it doesn't exist
        if(not os.path.exists(output_json_path)):
            os.mkdir(output_json_path)
        logger = MatchLogger(f'Parser_{getRandomString(1)}', htmlDocName,
                             domain, procedureType, languageCode, "HTML", fileNameLog)

        styleLogger = MatchLogger(
            f'Style Dictionary_{getRandomString(1)}', htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)

        styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                             controlBasePath=controlBasePath,
                                             language=languageCode,
                                             fileName=fileNameQrd,
                                             domain=domain,
                                             procedureType=procedureType
                                             )

        parserObj = parserExtractor(config, logger, styleRulesObj.styleRuleDict,
                                    styleRulesObj.styleFeatureKeyList,
                                    styleRulesObj.qrd_section_headings)

        for input_filename in filenames:
          # if(input_filename.find('Kalydeco II-86-PI-clean')!=-1):
            output_filename = os.path.join(output_json_path, htmlDocName)
            style_filepath =  output_filename.replace('.html','.txt')
            style_filepath =  style_filepath.replace('.txtl','.txt')
            style_filepath =  style_filepath.replace('.htm','.txt')
            print("-------------",style_filepath,"-----------------")

            output_filename = output_filename.replace('.html', '.json')
            output_filename = output_filename.replace('.htm', '.json')
            print(input_filename, output_filename)
            parserObj.createPIJsonFromHTML(input_filepath=input_filename,
                                           output_filepath=output_filename,
                                           style_filepath = style_filepath,
                                           img_base64_dict=parserObj.convertImgToBase64(input_filename)
                                           )
            
        return output_filename.split(pathSep)[-1], style_filepath
    else:
        try:    
            raise FolderNotFoundError(module_path + " not found")
        except:  
            logger.logFlowCheckpoint("Folder For Language Code Not Found In Input File")
            logger.logException("Folder For Language Code Not Found In Input File")
        raise FolderNotFoundError(module_path + " not found")
        return None


def splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog):

    styleLogger = MatchLogger(
        f'Style Dictionary_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    styleRulesObj = StyleRulesDictionary(logger=styleLogger,
                                        controlBasePath=controlBasePath,
                                        language=languageCode,
                                        fileName=fileNameQrd,
                                        domain=domain,
                                        procedureType=procedureType
                                        )
    
    path_json = os.path.join(basePath,'outputJSON', fileNameJson)
    print("PathJson",path_json)
    partitionLogger = MatchLogger(
        f'Partition_{getRandomString(1)}', fileNameJson, domain, procedureType, languageCode, "Json", fileNameLog)

    partitioner = DocTypePartitioner(partitionLogger)

    partitionedJsonPaths = partitioner.partitionHtmls(
        styleRulesObj.qrd_section_headings, path_json)

    return partitionedJsonPaths


def extractAndValidateHeadings(controlBasePath,
                                basePath,
                                domain,
                                procedureType,
                                languageCode,
                                documentNumber,
                                fileNameDoc,
                                fileNameQrd,
                                fileNameMatchRuleBook,
                                fileNameDocumentTypeNames,
                                fileNameLog,
                                stopWordFilterLen=6,
                                isPackageLeaflet=False,
                                medName=None
                                ):

    if documentNumber == 0:
        topHeadingsConsidered = 4
        bottomHeadingsConsidered = 6
    elif documentNumber == 1:
        topHeadingsConsidered = 3
        bottomHeadingsConsidered = 5
    elif documentNumber == 2:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 15
    else:
        topHeadingsConsidered = 5
        bottomHeadingsConsidered = 10

    print(f"Starting Heading Extraction For File :- {fileNameDoc}")
    logger = MatchLogger(f"Heading Extraction {fileNameDoc}_{getRandomString(1)}", fileNameDoc, domain, procedureType, languageCode, documentNumber, fileNameLog)
    logger.logFlowCheckpoint("Starting Heading Extraction")

    stopWordlanguage = DocumentTypeNames(
        controlBasePath=controlBasePath,
        fileNameDocumentTypeNames=fileNameDocumentTypeNames,
        languageCode=languageCode,
        domain=domain,
        procedureType=procedureType,
        documentNumber=documentNumber
        ).extractStopWordLanguage()

    matchDocObj = MatchDocument(
        logger,
        controlBasePath,
        basePath,
        domain,
        procedureType,
        languageCode,
        documentNumber,
        fileNameDoc,
        fileNameQrd,
        fileNameMatchRuleBook,
        fileNameDocumentTypeNames,
        topHeadingsConsidered,
        bottomHeadingsConsidered,
        stopWordFilterLen,
        stopWordlanguage,
        isPackageLeaflet,
        medName)
    df, coll = matchDocObj.matchHtmlHeaddingsWithQrd()

    return df, coll


def parseDocument(controlBasePath, basePath ,htmlDocName, fileNameQrd, fileNameMatchRuleBook, fileNameDocumentTypeNames, medName = None):
    
    if "/" in basePath:
        pathSep = "/"        
    else:
        pathSep = "\\"
    
    fileNameLog = os.path.join(basePath,'FinalLog.txt')

    pathComponents = basePath.split(pathSep)
    print(pathComponents, htmlDocName)
    timestamp = pathComponents[-1]
    languageCode =  pathComponents[-2]
    medName = pathComponents[-3]
    procedureType = pathComponents[-4]
    domain = pathComponents[-5]

    print(timestamp, languageCode, medName, procedureType, domain)
        
    flowLogger =  MatchLogger(f"Flow Logger HTML_{getRandomString(1)}", htmlDocName, domain, procedureType, languageCode, "HTML", fileNameLog)

    flowLogger.logFlowCheckpoint("Starting HTML Conversion To Json")
    ###Convert Html to Json
    fileNameJson, stylesFilePath = convertHtmlToJson(controlBasePath, basePath, domain, procedureType, languageCode, htmlDocName, fileNameQrd, fileNameLog)
    
    print("stylePath:-",stylesFilePath)
    flowLogger.logFlowCheckpoint("Completed HTML Conversion To Json")

    flowLogger.logFlowCheckpoint("Starting Json Split")

    ###Split Uber Json to multiple Jsons for each category.
    partitionedJsonPaths = splitJson(controlBasePath, basePath, domain, procedureType, languageCode, fileNameJson, fileNameQrd, fileNameLog)
    
    partitionedJsonPaths = [ path.split(pathSep)[-1] for path in partitionedJsonPaths]
    flowLogger.logFlowCheckpoint(str(partitionedJsonPaths))
    
    flowLogger.logFlowCheckpoint("Completed Json Split")
    
    flowLogger.logFlowCheckpoint("Started Processing Partitioned Jsons")
    
    for index, fileNamePartitioned in enumerate(partitionedJsonPaths):
        
        flowLogger.logFlowCheckpoint(f"\n\n\n\n||||||||||||||||||||||||||||||||{str(index)} ||||| {str(fileNamePartitioned)}||||||||||||||||||||||||||||||||\n\n\n\n")
        
        if index == 3:
            stopWordFilterLen = 100
            isPackageLeaflet = True
        else:
            stopWordFilterLen = 6
            isPackageLeaflet = False
            
        df, coll = extractAndValidateHeadings(controlBasePath,
                                    basePath,
                                    domain,
                                    procedureType,
                                    languageCode,
                                    index,
                                    fileNamePartitioned,
                                    fileNameQrd,
                                    fileNameMatchRuleBook,
                                    fileNameDocumentTypeNames,
                                    fileNameLog,
                                    stopWordFilterLen=stopWordFilterLen,
                                    isPackageLeaflet=isPackageLeaflet,
                                    medName=medName)
        
        
        print(f"Completed Heading Extraction For File")
        flowLogger.logFlowCheckpoint("Completed Heading Extraction For File")
        
        print(f"Starting Document Annotation For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Document Annotation For File")
        documentAnnotationObj = DocumentAnnotation(fileNamePartitioned,'c20835db4b1b4e108828a8537ff41506','https://spor-sit.azure-api.net/pms/api/v2/',df,coll)
        try:
            pms_oms_annotation_data = documentAnnotationObj.processRegulatedAuthorizationForDoc()
            print(pms_oms_annotation_data)
        except:
            pms_oms_annotation_data = None
            print("Error Found")
            
        print(f"Completed Document Annotation")        
        flowLogger.logFlowCheckpoint("Completed Document Annotation")
        
        print(f"Starting Extracting Content Between Heading For File :- {fileNamePartitioned}")        
        flowLogger.logFlowCheckpoint("Starting Extracting Content Between Heading")
        
        extractContentlogger =  MatchLogger(f'ExtractContentBetween_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        extractorObj = DataBetweenHeadingsExtractor(extractContentlogger, basePath, coll)
        dfExtractedHierRR = extractorObj.extractContentBetweenHeadings(fileNamePartitioned)
        
        print(f"Completed Extracting Content Between Heading")        
        flowLogger.logFlowCheckpoint("Completed Extracting Content Between Heading")
        
        xmlLogger =  MatchLogger(f'XmlGeneration_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)
        fhirXmlGeneratorObj = FhirXmlGenerator(xmlLogger, controlBasePath, basePath, pms_oms_annotation_data, stylesFilePath, medName)
        fileNameXml = fileNamePartitioned.replace('.json','.xml')
        generatedXml = fhirXmlGeneratorObj.generateXml(dfExtractedHierRR, fileNameXml)
        
        fhirServiceLogger =  MatchLogger(f'XML Submission Logger_{index}_{getRandomString(1)}', fileNamePartitioned, domain, procedureType, languageCode, index, fileNameLog)

        fhirServiceObj = FhirService(fhirServiceLogger, basePath, generatedXml)
        fhirServiceObj.submitFhirXml()
        print(f"Created XML File For :- {fileNamePartitioned}")      

        #return df,coll,dfExtractedHierRR
    
    flowLogger.logFlowCheckpoint("Completed Processing Partitioned Jsons")


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\psaga\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [3]:
wordToHtmlConvertorObj = WordToHtmlConvertor()
wordToHtmlConvertorObj.convertWordToHTML()

2021-05-13 20:50:08,853 : WordToHtmlLoggerS : Input file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\Ingest\ABASAGLAR~H~CAP~en.docx | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-13 20:50:08,856 : WordToHtmlLoggerS : Output file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\work\H\CAP\ABASAGLAR\en\2021-05-13T15-20-08Z\ABASAGLAR_clean | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx


Word Files in folder:  ['ABASAGLAR~H~CAP~en.docx', 'Abilify Maintena~H~CAP~en.doc', '~$ilify Maintena~H~CAP~en.doc']
Input file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\Ingest\ABASAGLAR~H~CAP~en.docx
('ABASAGLAR~H~CAP~en', '.doc', 'x')
ABASAGLAR~H~CAP~en.docx
Output file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\work\H\CAP\ABASAGLAR\en\2021-05-13T15-20-08Z\ABASAGLAR_clean


2021-05-13 20:50:12,704 : WordToHtmlLoggerS : Opened file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\Ingest\ABASAGLAR~H~CAP~en.docx | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx


Opened file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\Ingest\ABASAGLAR~H~CAP~en.docx


2021-05-13 20:50:13,899 : WordToHtmlLoggerS : Starting document cleaning process | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx



Checking table 20
The selection starts on page 105 of 106 (69.75/71.0)
The selection ends on page 105 of 106 (384.75/71.0)
The selection contains
* overlay images

Checking table 19
The selection starts on page 103 of 106 (579.5/71.0)
The selection ends on page 105 of 106 (56.75/71.0)
The selection contains
* overlay images

Checking table 18
The selection starts on page 102 of 105 (564.75/215.0)
The selection ends on page 103 of 105 (421.75/71.0)
The selection contains
* overlay images
* overlay shapes

Checking table 17
The selection starts on page 102 of 105 (56.75/71.0)
The selection ends on page 102 of 105 (476.25/71.0)
The selection contains
* overlay images

Checking table 16
The selection starts on page 100 of 105 (578.0/71.0)
The selection ends on page 101 of 105 (484.5/71.0)
The selection contains
* overlay images

Checking table 15
The selection starts on page 100 of 105 (196.75/71.0)
The selection ends on page 100 of 105 (331.75/71.0)
The selection contains
* overlay image

2021-05-13 20:51:08,369 : WordToHtmlLoggerS : Completed document cleaning process | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-13 20:51:08,371 : WordToHtmlLoggerS : Preparing zip file | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-13 20:51:10,179 : WordToHtmlLoggerS : Zip file created: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\inputblob\ABASAGLAR~H~CAP~en~2021-05-13T15-20-08Z.zip | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-13 20:51:10,183 : WordToHtmlLoggerS : Uploading to Azure Storage as blob:
	C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\inputblob\ABASAGLAR~H~CAP~en~2021-05-13T15-20-08Z.zip | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx



Uploading to Azure Storage as blob:
	C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\inputblob\ABASAGLAR~H~CAP~en~2021-05-13T15-20-08Z.zip


2021-05-13 20:51:14,440 : WordToHtmlLoggerS : UploadedC:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\inputblob\ABASAGLAR~H~CAP~en~2021-05-13T15-20-08Z.zipsuccessfully | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-13 20:51:14,443 : WordToHtmlLoggerS : Deleting input word file: ABASAGLAR~H~CAP~en.docx | H | CAP |  en | .docx | ABASAGLAR~H~CAP~en.docx
2021-05-13 20:51:14,482 : WordToHtmlLoggerJ : Input file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-13 20:51:14,484 : WordToHtmlLoggerJ : Output file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\work\H\CAP\Abilify Maintena\en\2021-05-13T15-21-14Z\Abilify Maintena_clean | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc


Input file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc
('Abilify Maintena~H~CAP~en', '.doc', '')
Abilify Maintena~H~CAP~en.doc
Output file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\work\H\CAP\Abilify Maintena\en\2021-05-13T15-21-14Z\Abilify Maintena_clean


2021-05-13 20:51:16,606 : WordToHtmlLoggerJ : Opened file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc


Opened file: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\data\Ingest\Abilify Maintena~H~CAP~en.doc


2021-05-13 20:51:18,264 : WordToHtmlLoggerJ : Starting document cleaning process | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc



Checking table 23
The selection starts on page 95 of 95 (619.25/71.0)
The selection ends on page 95 of 95 (737.0/71.0)
The selection contains
* inline images

Checking table 22
The selection starts on page 95 of 95 (303.5/71.0)
The selection ends on page 95 of 95 (480.25/71.0)
The selection contains
* inline images

Checking table 21
The selection starts on page 95 of 95 (107.25/71.0)
The selection ends on page 95 of 95 (227.5/71.0)
The selection contains
* inline images

Checking table 20
The selection starts on page 94 of 95 (652.75/71.0)
The selection ends on page 95 of 95 (56.75/71.0)
The selection contains
* inline images

Checking table 19
The selection starts on page 94 of 96 (494.25/71.0)
The selection ends on page 94 of 96 (627.5/71.0)
The selection contains
* inline images

Checking table 18
The selection starts on page 94 of 96 (353.25/71.0)
The selection ends on page 94 of 96 (456.5/71.0)
The selection contains

Checking table 17
The selection starts on page 94 of 96 (145.

2021-05-13 20:51:54,714 : WordToHtmlLoggerJ : Completed document cleaning process | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-13 20:51:54,718 : WordToHtmlLoggerJ : Preparing zip file | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-13 20:51:55,869 : WordToHtmlLoggerJ : Zip file created: C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-13T15-21-14Z.zip | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-13 20:51:55,872 : WordToHtmlLoggerJ : Uploading to Azure Storage as blob:
	C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-13T15-21-14Z.zip | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc



Uploading to Azure Storage as blob:
	C:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-13T15-21-14Z.zip


2021-05-13 20:52:09,002 : WordToHtmlLoggerJ : UploadedC:\Users\psaga\source\repos\EMA\EMA%20EPI%20PoC\function_code\inputblob\Abilify Maintena~H~CAP~en~2021-05-13T15-21-14Z.zipsuccessfully | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc
2021-05-13 20:52:09,004 : WordToHtmlLoggerJ : Deleting input word file: Abilify Maintena~H~CAP~en.doc | H | CAP |  en | .doc | Abilify Maintena~H~CAP~en.doc


In [None]:
# inputZipFolderPath = "F:\Projects\EMA\Repository\EMA EPI PoC\\function_code\\inputblob"
inputZipFolderPath = os.path.abspath(os.path.join('..'))
inputZipFolderPath = os.path.join(inputZipFolderPath, 'inputblob')
inputZipFileName = "ABASAGLAR~H~CAP~en~2021-05-13T10-00-18Z.zip"

In [None]:
fileNameQrd = 'qrd_canonical_model.csv'
fileNameMatchRuleBook = 'ruleDict.json'
fileNameDocumentTypeNames = 'documentTypeNames.json'
fsMountName = '/mounted'

info = inputZipFileName.split("~")

try:
    medName = info[0]
    domain = info[1]
    procedureType = info[2]
    languageCode = info[3]
    timestamp = info[4]
    timestamp = timestamp.replace(".zip","")

except Exception:
    raise f"Missing required info in the zip file name {inputZipFileName}"

if "\\" in os.getcwd():
    localEnv = True
    inputZipFolderPath = os.path.join(os.path.abspath(os.path.join('..')),inputZipFolderPath)
    outputFolderPath = os.path.join(os.path.abspath(os.path.join('..')), 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
    controlFolderPath = os.path.join(os.path.abspath(os.path.join('..')),'control')
else:
    localEnv = False
    inputZipFolderPath = os.path.join(f'{fsMountName}',inputZipFolderPath)
    outputFolderPath = os.path.join(f'{fsMountName}', 'work', f"{domain}", f"{procedureType}", f"{medName}", f"{languageCode}", f"{timestamp}")
    controlFolderPath = os.path.join(f'{fsMountName}','control')


print(inputZipFileName, inputZipFolderPath, outputFolderPath, controlFolderPath)

mode = 0o666

if localEnv is True:
    inputZipFolderPath = inputZipFolderPath.replace("/","\\")
    outputFolderPath = outputFolderPath.replace("/","\\")
    controlFolderPath = controlFolderPath.replace("/","\\")

try:
    os.makedirs(inputZipFolderPath, mode)
    os.makedirs(outputFolderPath, mode)
    os.makedirs(controlFolderPath, mode)

except Exception:
    print("Already Present")
    
with zipfile.ZipFile(f'{inputZipFolderPath}/{inputZipFileName}',"r") as zip_ref:
        zip_ref.extractall(outputFolderPath)
    

_,_,fileNames = next(os.walk(outputFolderPath))
htmlFileName = [fileName for fileName in fileNames if ".htm" in fileName][0]

print(htmlFileName)



In [None]:
parseDocument(controlFolderPath, outputFolderPath, htmlFileName, fileNameQrd, fileNameMatchRuleBook, fileNameDocumentTypeNames, medName)