From 3fd55b938294d5714e80a813474501b750b8413d Mon Sep 17 00:00:00 2001 From: Sylvain Perron Date: Tue, 30 Jul 2019 13:37:08 -0400 Subject: [PATCH] fix(nlu): known slots infinite loop --- modules/nlu/src/backend/engine.ts | 18 ++++------ .../backend/pipelines/slots/pre-processor.ts | 36 +++++++------------ 2 files changed, 19 insertions(+), 35 deletions(-) diff --git a/modules/nlu/src/backend/engine.ts b/modules/nlu/src/backend/engine.ts index b9ae9e5f70f..a1413c84f9d 100644 --- a/modules/nlu/src/backend/engine.ts +++ b/modules/nlu/src/backend/engine.ts @@ -18,8 +18,8 @@ import { sanitize } from './pipelines/language/sanitizer' import CRFExtractor from './pipelines/slots/crf_extractor' import { generateTrainingSequence } from './pipelines/slots/pre-processor' import Storage from './storage' -import { makeTokens } from './tools/token-utils' import { allInRange } from './tools/math' +import { makeTokens } from './tools/token-utils' import { LanguageProvider, NluMlRecommendations, TrainingSequence } from './typings' import { Engine, @@ -248,20 +248,14 @@ export default class ScopedEngine implements Engine { .value() private getTrainingSets = async (intentDefs: sdk.NLU.IntentDefinition[], lang: string): Promise => - await Promise.all( - _.chain(intentDefs) - .flatMap(await this.generateTrainingSequenceFromIntent(lang)) - .value() - ).reduce(this.flatMapIdendity, []) + _.flatten(await Promise.map(intentDefs, intentDef => this.generateTrainingSequenceFromIntent(lang, intentDef))) - private generateTrainingSequenceFromIntent = (lang: string) => async ( + private generateTrainingSequenceFromIntent = async ( + lang: string, intent: sdk.NLU.IntentDefinition ): Promise => - Promise.all( - (intent.utterances[lang] || []).map( - async utterance => - await this.scopedGenerateTrainingSequence(utterance, lang, intent.slots, intent.name, intent.contexts) - ) + await Promise.map(intent.utterances[lang] || [], utterance => + this.scopedGenerateTrainingSequence(utterance, lang, intent.slots, intent.name, intent.contexts) ) protected async loadModels(intents: sdk.NLU.IntentDefinition[], modelHash: string) { diff --git a/modules/nlu/src/backend/pipelines/slots/pre-processor.ts b/modules/nlu/src/backend/pipelines/slots/pre-processor.ts index 47e90e4ef2e..3913d0f19c2 100644 --- a/modules/nlu/src/backend/pipelines/slots/pre-processor.ts +++ b/modules/nlu/src/backend/pipelines/slots/pre-processor.ts @@ -1,9 +1,9 @@ import * as sdk from 'botpress/sdk' import _ from 'lodash' -import { makeTokens, mergeSpecialCharactersTokens } from '../../tools/token-utils' import { allInRange } from '../../tools/math' -import { LanguageProvider, KnownSlot, TrainingSequence } from '../../typings' +import { makeTokens, mergeSpecialCharactersTokens } from '../../tools/token-utils' +import { KnownSlot, LanguageProvider, TrainingSequence } from '../../typings' import { BIO, Sequence, Token } from '../../typings' const ALL_SLOTS_REGEX = /\[(.+?)\]\(([\w_\.-]+)\)/gi @@ -19,30 +19,20 @@ export function keepEntityValues(text: string): string { export function getKnownSlots(text: string, slotDefinitions: sdk.NLU.SlotDefinition[]): KnownSlot[] { const slots = [] as KnownSlot[] + const localSlotsRegex = /\[(.+?)\]\(([\w_\.-]+)\)/gi // local because it is stateful let regResult: RegExpExecArray | null - let cursor = 0 - do { - const textCpy = text.substring(cursor) - regResult = ITTERATIVE_SLOTS_REGEX.exec(textCpy) - if (regResult) { - const rawMatch = regResult[0] - const source = regResult[1] as string - const slotName = regResult[2] as string - - const previousSlot = _.last(slots) - const start = (previousSlot ? previousSlot.end : 0) + regResult.index - const end = start + source.length - - cursor = start + rawMatch.length - - const slotDef = slotDefinitions.find(sd => sd.name === slotName) - - if (slotDef) { - slots.push({ ...slotDef, start, end, source }) - } + while ((regResult = localSlotsRegex.exec(text))) { + const rawMatch = regResult[0] + const source = regResult[1] as string + const slotName = regResult[2] as string + + const slotDef = slotDefinitions.find(sd => sd.name === slotName) + + if (slotDef) { + slots.push({ ...slotDef, start: regResult.index, end: regResult.index + rawMatch.length, source }) } - } while (regResult) + } return slots }