Skip to content

Commit

Permalink
Merge pull request evt-project#124 from evt-project/refactor/named-en…
Browse files Browse the repository at this point in the history
…tities-parser

Separate named entities list parser from service
  • Loading branch information
szenzaro committed Feb 20, 2021
2 parents 7861ab7 + 8432993 commit 6bcbd51
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 81 deletions.
4 changes: 4 additions & 0 deletions src/app/services/xml-parsers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ export class ParserRegister {
if (nes.includes(tagName)) {
return 'evt-named-entity-parser';
}
const nels = ['listPerson', 'listPlace', 'listOrg', 'listEvent'];
if (nels.includes(tagName)) {
return 'evt-named-entities-list-parser';
}
const crit = ['rdg', 'lem'];
if (crit.includes(tagName)) {
return 'rdg';
Expand Down
88 changes: 9 additions & 79 deletions src/app/services/xml-parsers/named-entities-parser.service.ts
Original file line number Diff line number Diff line change
@@ -1,35 +1,27 @@
import { Injectable } from '@angular/core';
import { parse } from '.';
import { AppConfig } from '../../app.config';
import { parse, ParserRegister } from '.';
import {
NamedEntitiesList, NamedEntity, NamedEntityOccurrence, NamedEntityOccurrenceRef, NamedEntityType, Page, XMLElement,
NamedEntitiesList, NamedEntity, NamedEntityOccurrence, NamedEntityOccurrenceRef, Page, XMLElement,
} from '../../models/evt-models';
import { isNestedInElem, xpath } from '../../utils/dom-utils';
import { isNestedInElem } from '../../utils/dom-utils';
import { Map } from '../../utils/js-utils';
import { replaceNewLines } from '../../utils/xml-utils';
import { AttributeMapParser, GenericElemParser } from './basic-parsers';
import { RelationParser } from './named-entity-parsers';
import { GenericElemParser } from './basic-parsers';
import { getListsToParseTagNames, namedEntitiesListsTagNamesMap } from './named-entity-parsers';
import { createParser } from './parser-models';

@Injectable({
providedIn: 'root',
})
export class NamedEntitiesParserService {
private neListsConfig = AppConfig.evtSettings.edition.namedEntitiesLists || {};
private tagNamesMap: { [key: string]: string } = {
persons: 'listPerson',
places: 'listPlace',
organizations: 'listOrg',
events: 'listEvent',
occurrences: 'persName[ref], placeName[ref], orgName[ref], geogName[ref], event[ref]',
};
private tagNamesMap = namedEntitiesListsTagNamesMap;

public parseLists(document: XMLElement) {
const listsToParse = this.getListsToParseTagNames();
const listsToParse = getListsToParseTagNames();
const listParser = ParserRegister.get('evt-named-entities-list-parser');
// We consider only first level lists; inset lists will be considered
const lists = Array.from(document.querySelectorAll<XMLElement>(listsToParse.toString()))
.filter((list) => !isNestedInElem(list, list.tagName))
.map((l) => this.parseList(l));
.map((l) => listParser.parse(l) as NamedEntitiesList);

return {
lists,
Expand All @@ -38,75 +30,13 @@ export class NamedEntitiesParserService {
};
}

private parseList(list: XMLElement) {
const attributeParser = createParser(AttributeMapParser, parse);
const parsedList: NamedEntitiesList = {
type: NamedEntitiesList,
id: list.getAttribute('xml:id') || xpath(list),
label: '',
namedEntityType: this.getListType(list.tagName),
content: [],
sublists: [],
originalEncoding: list,
relations: [],
description: [],
attributes: attributeParser.parse(list),
};

const relationParse = createParser(RelationParser, parse);
list.childNodes.forEach((child: XMLElement) => {
if (child.nodeType === 1) {
switch (child.tagName.toLowerCase()) {
case 'head':
parsedList.label = replaceNewLines(child.textContent);
break;
case 'desc':
parsedList.description.push(parse(child));
break;
case 'relation':
if (this.neListsConfig.relations.enabled) {
parsedList.relations.push(relationParse.parse(child));
}
break;
case 'listrelation':
if (this.neListsConfig.relations.enabled) {
child.querySelectorAll<XMLElement>('relation').forEach(r => parsedList.relations.push(relationParse.parse(r)));
}
break;
default:
if (this.getListsToParseTagNames().indexOf(child.tagName) >= 0) {
const parsedSubList = this.parseList(child);
parsedList.sublists.push(parsedSubList);
parsedList.content = parsedList.content.concat(parsedSubList.content);
parsedList.relations = parsedList.relations.concat(parsedSubList.relations);
} else {
parsedList.content.push(parse(child) as NamedEntity);
}
}
}
});
parsedList.label = parsedList.label || list.getAttribute('type') || `List of ${parsedList.namedEntityType}`;

return parsedList;
}

public getResultsByType(lists: NamedEntitiesList[], entities: NamedEntity[], type: string[]) {
return {
lists: lists.filter(list => type.indexOf(list.namedEntityType) >= 0),
entities: entities.filter(entity => type.indexOf(entity.namedEntityType) >= 0),
};
}

private getListsToParseTagNames() {
return Object.keys(this.neListsConfig)
.map((i) => this.neListsConfig[i].enabled ? this.tagNamesMap[i] : undefined)
.filter(ne => !!ne);
}

private getListType(tagName): NamedEntityType {
return tagName.replace('list', '').toLowerCase();
}

public parseNamedEntitiesOccurrences(pages: Page[]) {
return pages.map(p => this.getNamedEntitiesOccurrencesInPage(p))
.reduce(
Expand Down
82 changes: 80 additions & 2 deletions src/app/services/xml-parsers/named-entity-parsers.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,91 @@
import { xmlParser } from '.';
import { AppConfig } from 'src/app/app.config';
import { ParserRegister, xmlParser } from '.';
import {
GenericElement, NamedEntity, NamedEntityInfo, NamedEntityLabel,
GenericElement, NamedEntitiesList, NamedEntity, NamedEntityInfo, NamedEntityLabel,
NamedEntityRef, NamedEntityType, Relation, XMLElement,
} from '../../models/evt-models';
import { xpath } from '../../utils/dom-utils';
import { replaceNewLines } from '../../utils/xml-utils';
import { AttributeMapParser, AttributeParser, EmptyParser, GenericElemParser, TextParser } from './basic-parsers';
import { createParser, parseChildren, Parser } from './parser-models';

export const namedEntitiesListsTagNamesMap: { [key: string]: string } = {
persons: 'listPerson',
places: 'listPlace',
organizations: 'listOrg',
events: 'listEvent',
occurrences: 'persName[ref], placeName[ref], orgName[ref], geogName[ref], event[ref]',
};

export function getListType(tagName): NamedEntityType {
return tagName.replace('list', '').toLowerCase();
}

export function getListsToParseTagNames() {
const neListsConfig = AppConfig.evtSettings.edition.namedEntitiesLists || {};

return Object.keys(neListsConfig)
.map((i) => neListsConfig[i].enabled ? namedEntitiesListsTagNamesMap[i] : undefined)
.filter(ne => !!ne);
}

@xmlParser('evt-named-entities-list-parser', NamedEntitiesListParser)
export class NamedEntitiesListParser extends EmptyParser implements Parser<XMLElement> {
private neListsConfig = AppConfig.evtSettings.edition.namedEntitiesLists || {};
attributeParser = createParser(AttributeParser, this.genericParse);
parse(xml: XMLElement): NamedEntitiesList {
const parsedList: NamedEntitiesList = {
type: NamedEntitiesList,
id: xml.getAttribute('xml:id') || xpath(xml),
label: '',
namedEntityType: getListType(xml.tagName),
content: [],
sublists: [],
originalEncoding: xml,
relations: [],
description: [],
attributes: this.attributeParser.parse(xml),
};

const relationParse = createParser(RelationParser, this.genericParse);
xml.childNodes.forEach((child: XMLElement) => {
if (child.nodeType === 1) {
switch (child.tagName.toLowerCase()) {
case 'head':
parsedList.label = replaceNewLines(child.textContent);
break;
case 'desc':
parsedList.description.push(this.genericParse(child));
break;
case 'relation':
if (this.neListsConfig.relations.enabled) {
parsedList.relations.push(relationParse.parse(child));
}
break;
case 'listrelation':
if (this.neListsConfig.relations.enabled) {
child.querySelectorAll<XMLElement>('relation').forEach(r => parsedList.relations.push(relationParse.parse(r)));
}
break;
default:
if (getListsToParseTagNames().indexOf(child.tagName) >= 0) {
const subListParser = ParserRegister.get('evt-named-entities-list-parser');
const parsedSubList = subListParser.parse(child) as NamedEntitiesList;
parsedList.sublists.push(parsedSubList);
parsedList.content = parsedList.content.concat(parsedSubList.content);
parsedList.relations = parsedList.relations.concat(parsedSubList.relations);
} else {
parsedList.content.push(this.genericParse(child) as NamedEntity);
}
}
}
});
parsedList.label = parsedList.label || xml.getAttribute('type') || `List of ${parsedList.namedEntityType}`;

return parsedList;
}
}

@xmlParser('evt-named-entity-parser', NamedEntityRefParser)
export class NamedEntityRefParser extends EmptyParser implements Parser<XMLElement> {
elementParser = createParser(GenericElemParser, this.genericParse);
Expand Down

0 comments on commit 6bcbd51

Please sign in to comment.