Skip to content

Commit

Permalink
fix(nlu): known slots infinite loop
Browse files Browse the repository at this point in the history
  • Loading branch information
slvnperron committed Jul 30, 2019
1 parent f5af51c commit 3fd55b9
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 35 deletions.
18 changes: 6 additions & 12 deletions modules/nlu/src/backend/engine.ts
Expand Up @@ -18,8 +18,8 @@ import { sanitize } from './pipelines/language/sanitizer'
import CRFExtractor from './pipelines/slots/crf_extractor'
import { generateTrainingSequence } from './pipelines/slots/pre-processor'
import Storage from './storage'
import { makeTokens } from './tools/token-utils'
import { allInRange } from './tools/math'
import { makeTokens } from './tools/token-utils'
import { LanguageProvider, NluMlRecommendations, TrainingSequence } from './typings'
import {
Engine,
Expand Down Expand Up @@ -248,20 +248,14 @@ export default class ScopedEngine implements Engine {
.value()

private getTrainingSets = async (intentDefs: sdk.NLU.IntentDefinition[], lang: string): Promise<TrainingSequence[]> =>
await Promise.all(
_.chain(intentDefs)
.flatMap(await this.generateTrainingSequenceFromIntent(lang))
.value()
).reduce(this.flatMapIdendity, [])
_.flatten(await Promise.map(intentDefs, intentDef => this.generateTrainingSequenceFromIntent(lang, intentDef)))

private generateTrainingSequenceFromIntent = (lang: string) => async (
private generateTrainingSequenceFromIntent = async (
lang: string,
intent: sdk.NLU.IntentDefinition
): Promise<TrainingSequence[]> =>
Promise.all(
(intent.utterances[lang] || []).map(
async utterance =>
await this.scopedGenerateTrainingSequence(utterance, lang, intent.slots, intent.name, intent.contexts)
)
await Promise.map(intent.utterances[lang] || [], utterance =>
this.scopedGenerateTrainingSequence(utterance, lang, intent.slots, intent.name, intent.contexts)
)

protected async loadModels(intents: sdk.NLU.IntentDefinition[], modelHash: string) {
Expand Down
36 changes: 13 additions & 23 deletions modules/nlu/src/backend/pipelines/slots/pre-processor.ts
@@ -1,9 +1,9 @@
import * as sdk from 'botpress/sdk'
import _ from 'lodash'

import { makeTokens, mergeSpecialCharactersTokens } from '../../tools/token-utils'
import { allInRange } from '../../tools/math'
import { LanguageProvider, KnownSlot, TrainingSequence } from '../../typings'
import { makeTokens, mergeSpecialCharactersTokens } from '../../tools/token-utils'
import { KnownSlot, LanguageProvider, TrainingSequence } from '../../typings'
import { BIO, Sequence, Token } from '../../typings'

const ALL_SLOTS_REGEX = /\[(.+?)\]\(([\w_\.-]+)\)/gi
Expand All @@ -19,30 +19,20 @@ export function keepEntityValues(text: string): string {

export function getKnownSlots(text: string, slotDefinitions: sdk.NLU.SlotDefinition[]): KnownSlot[] {
const slots = [] as KnownSlot[]
const localSlotsRegex = /\[(.+?)\]\(([\w_\.-]+)\)/gi // local because it is stateful

let regResult: RegExpExecArray | null
let cursor = 0
do {
const textCpy = text.substring(cursor)
regResult = ITTERATIVE_SLOTS_REGEX.exec(textCpy)
if (regResult) {
const rawMatch = regResult[0]
const source = regResult[1] as string
const slotName = regResult[2] as string

const previousSlot = _.last(slots)
const start = (previousSlot ? previousSlot.end : 0) + regResult.index
const end = start + source.length

cursor = start + rawMatch.length

const slotDef = slotDefinitions.find(sd => sd.name === slotName)

if (slotDef) {
slots.push({ ...slotDef, start, end, source })
}
while ((regResult = localSlotsRegex.exec(text))) {
const rawMatch = regResult[0]
const source = regResult[1] as string
const slotName = regResult[2] as string

const slotDef = slotDefinitions.find(sd => sd.name === slotName)

if (slotDef) {
slots.push({ ...slotDef, start: regResult.index, end: regResult.index + rawMatch.length, source })
}
} while (regResult)
}

return slots
}
Expand Down

0 comments on commit 3fd55b9

Please sign in to comment.