diff --git a/src/app/services/xml-parsers/index.ts b/src/app/services/xml-parsers/index.ts index ec604ed33..708a8dd0f 100644 --- a/src/app/services/xml-parsers/index.ts +++ b/src/app/services/xml-parsers/index.ts @@ -23,6 +23,10 @@ export class ParserRegister { if (nes.includes(tagName)) { return 'evt-named-entity-parser'; } + const nels = ['listPerson', 'listPlace', 'listOrg', 'listEvent']; + if (nels.includes(tagName)) { + return 'evt-named-entities-list-parser'; + } const crit = ['rdg', 'lem']; if (crit.includes(tagName)) { return 'rdg'; diff --git a/src/app/services/xml-parsers/named-entities-parser.service.ts b/src/app/services/xml-parsers/named-entities-parser.service.ts index 65b6ac014..ad81c6dc9 100644 --- a/src/app/services/xml-parsers/named-entities-parser.service.ts +++ b/src/app/services/xml-parsers/named-entities-parser.service.ts @@ -1,35 +1,27 @@ import { Injectable } from '@angular/core'; -import { parse } from '.'; -import { AppConfig } from '../../app.config'; +import { parse, ParserRegister } from '.'; import { - NamedEntitiesList, NamedEntity, NamedEntityOccurrence, NamedEntityOccurrenceRef, NamedEntityType, Page, XMLElement, + NamedEntitiesList, NamedEntity, NamedEntityOccurrence, NamedEntityOccurrenceRef, Page, XMLElement, } from '../../models/evt-models'; -import { isNestedInElem, xpath } from '../../utils/dom-utils'; +import { isNestedInElem } from '../../utils/dom-utils'; import { Map } from '../../utils/js-utils'; -import { replaceNewLines } from '../../utils/xml-utils'; -import { AttributeMapParser, GenericElemParser } from './basic-parsers'; -import { RelationParser } from './named-entity-parsers'; +import { GenericElemParser } from './basic-parsers'; +import { getListsToParseTagNames, namedEntitiesListsTagNamesMap } from './named-entity-parsers'; import { createParser } from './parser-models'; @Injectable({ providedIn: 'root', }) export class NamedEntitiesParserService { - private neListsConfig = AppConfig.evtSettings.edition.namedEntitiesLists || {}; - private tagNamesMap: { [key: string]: string } = { - persons: 'listPerson', - places: 'listPlace', - organizations: 'listOrg', - events: 'listEvent', - occurrences: 'persName[ref], placeName[ref], orgName[ref], geogName[ref], event[ref]', - }; + private tagNamesMap = namedEntitiesListsTagNamesMap; public parseLists(document: XMLElement) { - const listsToParse = this.getListsToParseTagNames(); + const listsToParse = getListsToParseTagNames(); + const listParser = ParserRegister.get('evt-named-entities-list-parser'); // We consider only first level lists; inset lists will be considered const lists = Array.from(document.querySelectorAll(listsToParse.toString())) .filter((list) => !isNestedInElem(list, list.tagName)) - .map((l) => this.parseList(l)); + .map((l) => listParser.parse(l) as NamedEntitiesList); return { lists, @@ -38,58 +30,6 @@ export class NamedEntitiesParserService { }; } - private parseList(list: XMLElement) { - const attributeParser = createParser(AttributeMapParser, parse); - const parsedList: NamedEntitiesList = { - type: NamedEntitiesList, - id: list.getAttribute('xml:id') || xpath(list), - label: '', - namedEntityType: this.getListType(list.tagName), - content: [], - sublists: [], - originalEncoding: list, - relations: [], - description: [], - attributes: attributeParser.parse(list), - }; - - const relationParse = createParser(RelationParser, parse); - list.childNodes.forEach((child: XMLElement) => { - if (child.nodeType === 1) { - switch (child.tagName.toLowerCase()) { - case 'head': - parsedList.label = replaceNewLines(child.textContent); - break; - case 'desc': - parsedList.description.push(parse(child)); - break; - case 'relation': - if (this.neListsConfig.relations.enabled) { - parsedList.relations.push(relationParse.parse(child)); - } - break; - case 'listrelation': - if (this.neListsConfig.relations.enabled) { - child.querySelectorAll('relation').forEach(r => parsedList.relations.push(relationParse.parse(r))); - } - break; - default: - if (this.getListsToParseTagNames().indexOf(child.tagName) >= 0) { - const parsedSubList = this.parseList(child); - parsedList.sublists.push(parsedSubList); - parsedList.content = parsedList.content.concat(parsedSubList.content); - parsedList.relations = parsedList.relations.concat(parsedSubList.relations); - } else { - parsedList.content.push(parse(child) as NamedEntity); - } - } - } - }); - parsedList.label = parsedList.label || list.getAttribute('type') || `List of ${parsedList.namedEntityType}`; - - return parsedList; - } - public getResultsByType(lists: NamedEntitiesList[], entities: NamedEntity[], type: string[]) { return { lists: lists.filter(list => type.indexOf(list.namedEntityType) >= 0), @@ -97,16 +37,6 @@ export class NamedEntitiesParserService { }; } - private getListsToParseTagNames() { - return Object.keys(this.neListsConfig) - .map((i) => this.neListsConfig[i].enabled ? this.tagNamesMap[i] : undefined) - .filter(ne => !!ne); - } - - private getListType(tagName): NamedEntityType { - return tagName.replace('list', '').toLowerCase(); - } - public parseNamedEntitiesOccurrences(pages: Page[]) { return pages.map(p => this.getNamedEntitiesOccurrencesInPage(p)) .reduce( diff --git a/src/app/services/xml-parsers/named-entity-parsers.ts b/src/app/services/xml-parsers/named-entity-parsers.ts index 1091d9868..83be0759f 100644 --- a/src/app/services/xml-parsers/named-entity-parsers.ts +++ b/src/app/services/xml-parsers/named-entity-parsers.ts @@ -1,6 +1,7 @@ -import { xmlParser } from '.'; +import { AppConfig } from 'src/app/app.config'; +import { ParserRegister, xmlParser } from '.'; import { - GenericElement, NamedEntity, NamedEntityInfo, NamedEntityLabel, + GenericElement, NamedEntitiesList, NamedEntity, NamedEntityInfo, NamedEntityLabel, NamedEntityRef, NamedEntityType, Relation, XMLElement, } from '../../models/evt-models'; import { xpath } from '../../utils/dom-utils'; @@ -8,6 +9,83 @@ import { replaceNewLines } from '../../utils/xml-utils'; import { AttributeMapParser, AttributeParser, EmptyParser, GenericElemParser, TextParser } from './basic-parsers'; import { createParser, parseChildren, Parser } from './parser-models'; +export const namedEntitiesListsTagNamesMap: { [key: string]: string } = { + persons: 'listPerson', + places: 'listPlace', + organizations: 'listOrg', + events: 'listEvent', + occurrences: 'persName[ref], placeName[ref], orgName[ref], geogName[ref], event[ref]', +}; + +export function getListType(tagName): NamedEntityType { + return tagName.replace('list', '').toLowerCase(); +} + +export function getListsToParseTagNames() { + const neListsConfig = AppConfig.evtSettings.edition.namedEntitiesLists || {}; + + return Object.keys(neListsConfig) + .map((i) => neListsConfig[i].enabled ? namedEntitiesListsTagNamesMap[i] : undefined) + .filter(ne => !!ne); +} + +@xmlParser('evt-named-entities-list-parser', NamedEntitiesListParser) +export class NamedEntitiesListParser extends EmptyParser implements Parser { + private neListsConfig = AppConfig.evtSettings.edition.namedEntitiesLists || {}; + attributeParser = createParser(AttributeParser, this.genericParse); + parse(xml: XMLElement): NamedEntitiesList { + const parsedList: NamedEntitiesList = { + type: NamedEntitiesList, + id: xml.getAttribute('xml:id') || xpath(xml), + label: '', + namedEntityType: getListType(xml.tagName), + content: [], + sublists: [], + originalEncoding: xml, + relations: [], + description: [], + attributes: this.attributeParser.parse(xml), + }; + + const relationParse = createParser(RelationParser, this.genericParse); + xml.childNodes.forEach((child: XMLElement) => { + if (child.nodeType === 1) { + switch (child.tagName.toLowerCase()) { + case 'head': + parsedList.label = replaceNewLines(child.textContent); + break; + case 'desc': + parsedList.description.push(this.genericParse(child)); + break; + case 'relation': + if (this.neListsConfig.relations.enabled) { + parsedList.relations.push(relationParse.parse(child)); + } + break; + case 'listrelation': + if (this.neListsConfig.relations.enabled) { + child.querySelectorAll('relation').forEach(r => parsedList.relations.push(relationParse.parse(r))); + } + break; + default: + if (getListsToParseTagNames().indexOf(child.tagName) >= 0) { + const subListParser = ParserRegister.get('evt-named-entities-list-parser'); + const parsedSubList = subListParser.parse(child) as NamedEntitiesList; + parsedList.sublists.push(parsedSubList); + parsedList.content = parsedList.content.concat(parsedSubList.content); + parsedList.relations = parsedList.relations.concat(parsedSubList.relations); + } else { + parsedList.content.push(this.genericParse(child) as NamedEntity); + } + } + } + }); + parsedList.label = parsedList.label || xml.getAttribute('type') || `List of ${parsedList.namedEntityType}`; + + return parsedList; + } +} + @xmlParser('evt-named-entity-parser', NamedEntityRefParser) export class NamedEntityRefParser extends EmptyParser implements Parser { elementParser = createParser(GenericElemParser, this.genericParse);