Skip to content

Commit

Permalink
Merge pull request #2019 from botpress/f_ambiguity
Browse files Browse the repository at this point in the history
feat(NLU): Ambiguity
  • Loading branch information
EFF committed Jun 28, 2019
2 parents 1c63f0d + e52a851 commit c6f9395
Show file tree
Hide file tree
Showing 12 changed files with 80 additions and 26 deletions.
Expand Up @@ -10,6 +10,10 @@
padding: 15px;
background: #f5f5f5;
color: #353535;
.title {
display: flex;
justify-content: space-between;
}
.subSection {
margin: 10px 5px;
h5 {
Expand All @@ -20,6 +24,7 @@
text-transform: capitalize;
}
ul {
margin: 5px 0;
padding-left: 15px;
li {
line-height: 1.5em;
Expand Down
Expand Up @@ -12,6 +12,7 @@ interface CssExports {
'splash': string;
'subSection': string;
'summaryTable': string;
'title': string;
}
declare var cssExports: CssExports;
export = cssExports;
@@ -1,4 +1,4 @@
import { H4 } from '@blueprintjs/core'
import { Colors, H4, Icon, Position, Tooltip } from '@blueprintjs/core'
import * as sdk from 'botpress/sdk'
import _ from 'lodash'
import React, { SFC } from 'react'
Expand All @@ -16,7 +16,25 @@ const NLU: SFC<{ nluData: sdk.IO.EventUnderstanding }> = ({ nluData }) => {

return (
<div className={style.block}>
<H4>Understanding</H4>
<div className={style.title}>
<H4>Understanding</H4>
{nluData.ambiguous && (
<Tooltip
position={Position.TOP}
content={
<span>
Predicted intents are very close. You can account for it checking the{' '}
<strong>event.nlu.ambiguous</strong> variable.
</span>
}
>
<span style={{ color: Colors.GRAY1 }}>
<Icon icon="warning-sign" color={Colors.GRAY1} />
&nbsp;Ambiguous
</span>
</Tooltip>
)}
</div>
<Intents intents={nluData.intents} intent={nluData.intent} />
{nluData.entities.length > 0 && <Entities entities={nluData.entities} />}
{nluData.slots && !_.isEmpty(nluData.slots) && <Slots slots={nluData.slots} />}
Expand Down
25 changes: 20 additions & 5 deletions modules/nlu/src/backend/engine.ts
Expand Up @@ -7,7 +7,7 @@ import ms from 'ms'

import { Config } from '../config'

import { PipelineManager } from './pipelinemanager'
import { PipelineManager } from './pipeline-manager'
import { DucklingEntityExtractor } from './pipelines/entities/duckling_extractor'
import PatternExtractor from './pipelines/entities/pattern_extractor'
import { getTextWithoutEntities } from './pipelines/entities/util'
Expand All @@ -18,6 +18,7 @@ import { sanitize } from './pipelines/language/sanitizer'
import CRFExtractor from './pipelines/slots/crf_extractor'
import { generateTrainingSequence } from './pipelines/slots/pre-processor'
import Storage from './storage'
import { allInRange } from './tools/math'
import { LanguageProvider } from './typings'
import {
Engine,
Expand All @@ -37,6 +38,7 @@ const debugEntities = debugExtract.sub('entities')
const debugSlots = debugExtract.sub('slots')
const debugLang = debugExtract.sub('lang')
const MIN_NB_UTTERANCES = 3
const AMBIGUITY_RANGE = 0.05 // +- 5% away from perfect median leads to ambiguity

export default class ScopedEngine implements Engine {
public readonly storage: Storage
Expand All @@ -61,7 +63,7 @@ export default class ScopedEngine implements Engine {
) => Promise<Sequence>

// move this in a functionnal util file?
private readonly flatMapIdentityReducer = (a, b) => a.concat(b)
private readonly flatMapIdendity = (a, b) => a.concat(b)

private retryPolicy = {
interval: 100,
Expand Down Expand Up @@ -192,11 +194,12 @@ export default class ScopedEngine implements Engine {

try {
const runner = this.pipelineManager.withPipeline(this._pipeline).initFromText(text, includedContexts)
const nluResults = await retry(() => runner.run(), this.retryPolicy)
const nluResults = (await retry(() => runner.run(), this.retryPolicy)) as NLUStructure
res = _.pick(
nluResults,
'intent',
'intents',
'ambiguous',
'language',
'detectedLanguage',
'entities',
Expand Down Expand Up @@ -234,7 +237,7 @@ export default class ScopedEngine implements Engine {
_.chain(intentDefs)
.flatMap(await this.generateTrainingSequenceFromIntent(lang))
.value()
).reduce(this.flatMapIdentityReducer, [])
).reduce(this.flatMapIdendity, [])

private generateTrainingSequenceFromIntent = (lang: string) => async (
intent: sdk.NLU.IntentDefinition
Expand Down Expand Up @@ -325,6 +328,7 @@ export default class ScopedEngine implements Engine {
protected async trainModels(intentDefs: sdk.NLU.IntentDefinition[], modelHash: string, confusionVersion = undefined) {
// TODO use the same data structure to train intent and slot models
// TODO generate single training set here and filter

for (const lang of this.languages) {
try {
const trainableIntents = intentDefs.filter(i => (i.utterances[lang] || []).length >= MIN_NB_UTTERANCES)
Expand Down Expand Up @@ -372,6 +376,17 @@ export default class ScopedEngine implements Engine {
return ds
}

// returns a promise to comply with PipelineStep
private _setAmbiguity = (ds: NLUStructure): Promise<NLUStructure> => {
const perfectConfusion = 1 / ds.intents.length
const lower = perfectConfusion - AMBIGUITY_RANGE
const upper = perfectConfusion + AMBIGUITY_RANGE

ds.ambiguous = allInRange(ds.intents.map(i => i.confidence), lower, upper)

return Promise.resolve(ds)
}

private _extractIntents = async (ds: NLUStructure): Promise<NLUStructure> => {
const exactMatcher = this._exactIntentMatchers[ds.language]
const exactIntent = exactMatcher && exactMatcher.exactMatch(ds.sanitizedText, ds.includedContexts)
Expand All @@ -387,7 +402,6 @@ export default class ScopedEngine implements Engine {
return ds
}

// TODO add disambiguation flag if intent 1 & intent 2
const intents = await this.intentClassifiers[ds.language].predict(ds.tokens, ds.includedContexts)

// alter ctx with the given predictions in case where no ctx were provided
Expand Down Expand Up @@ -453,6 +467,7 @@ export default class ScopedEngine implements Engine {
this._extractEntities,
this._setTextWithoutEntities,
this._extractIntents,
this._setAmbiguity,
this._extractSlots
]
}
@@ -1,20 +1,20 @@
import * as sdk from 'botpress/sdk'

import { NLUStructure, PipelineProcessManager } from './typings'
import { NLUStructure } from './typings'

export type PipelineStep = (ds: NLUStructure) => Promise<NLUStructure>

export class PipelineManager implements PipelineProcessManager {
export class PipelineManager {
private _nluds!: NLUStructure
private _pipeline!: PipelineStep[]

public withPipeline(pipeline: PipelineStep[]): PipelineProcessManager {
public withPipeline(pipeline: PipelineStep[]): PipelineManager {
this._pipeline = pipeline
return this
}

public initFromText(text: string, includedContexts: string[]): PipelineProcessManager {
this._nluds = initNLUDS(text, includedContexts)
public initFromText(text: string, includedContexts: string[]): PipelineManager {
this._nluds = initNLUStruct(text, includedContexts)
return this
}

Expand All @@ -35,16 +35,18 @@ export class PipelineManager implements PipelineProcessManager {
}
}

export const initNLUDS = (text: string, includedContexts: string[]): NLUStructure => {
// exported for testing purposes
export const initNLUStruct = (text: string, includedContexts: string[]): NLUStructure => {
return {
rawText: text,
lowerText: '',
includedContexts: includedContexts,
detectedLanguage: '',
language: '',
entities: [],
intents: [],
ambiguous: false,
intent: {} as sdk.NLU.Intent,
intents: [],
sanitizedText: '',
tokens: [],
slots: {}
Expand Down
Expand Up @@ -2,9 +2,9 @@ import 'bluebird-global'
import * as sdk from 'botpress/sdk'
import _ from 'lodash'

import { initNLUStruct } from '../../pipeline-manager'
import { LanguageProvider, NLUHealth } from '../../typings'

import { initNLUDS } from './../../pipelinemanager'
import PatternExtractor from './pattern_extractor'

const languageProvider: LanguageProvider = {
Expand Down Expand Up @@ -85,7 +85,7 @@ I'm riding my mercedes-benz to the dealership then I will take my BM to buy an o
const extractor = new PatternExtractor(Toolkit, languageProvider)

const sanitized = userInput.replace('\n', '')
const ds = initNLUDS(sanitized, ['global'])
const ds = initNLUStruct(sanitized, ['global'])
ds.lowerText = sanitized
ds.language = 'en'
ds.tokens = await languageProvider.tokenize(sanitized, 'en')
Expand Down Expand Up @@ -140,7 +140,7 @@ My name is kanye West and I rap like kanye wsest` /*

const extractor = new PatternExtractor(Toolkit, languageProvider)
const sanitized = userInput.replace('\n', '')
const ds = initNLUDS(sanitized, ['global'])
const ds = initNLUStruct(sanitized, ['global'])
ds.lowerText = sanitized
ds.language = 'en'
ds.tokens = await languageProvider.tokenize(sanitized, 'en')
Expand Down Expand Up @@ -177,7 +177,7 @@ My name is kanye West and I rap like kanye wsest` /*

const extractor = new PatternExtractor(Toolkit, languageProvider)
const sanitized = userInput.replace('\n', '')
const ds = initNLUDS(sanitized, ['global'])
const ds = initNLUStruct(sanitized, ['global'])
ds.lowerText = sanitized
ds.language = 'en'
ds.tokens = await languageProvider.tokenize(sanitized, 'en')
Expand Down
12 changes: 11 additions & 1 deletion modules/nlu/src/backend/tools/math.test.ts
@@ -1,4 +1,4 @@
import { computeNorm, scalarMultiply, vectorAdd } from './math'
import { allInRange, computeNorm, scalarMultiply, vectorAdd } from './math'

describe('Math utils', () => {
const vec0 = []
Expand Down Expand Up @@ -26,4 +26,14 @@ describe('Math utils', () => {
expect(computeNorm(vec3)).toBeCloseTo(8.77, 2)
expect(computeNorm([22, 21, 59, 4, -5, 36])).toBeCloseTo(75.78, 2)
})

test('AllInRange', () => {
expect(allInRange([0.45, 0.55], 0.45, 0.55)).toBeFalsy()
expect(allInRange([0.44, 0.55], 0.45, 0.55)).toBeFalsy()
expect(allInRange([0.45, 0.56], 0.45, 0.55)).toBeFalsy()
expect(allInRange([0.4, 0.6], 0.45, 0.55)).toBeFalsy()
expect(allInRange([0.46, 0.54], 0.45, 0.55)).toBeTruthy()
expect(allInRange([0.32, 0.32, 0.35], 0.3, 0.36)).toBeTruthy()
expect(allInRange([], 0.3, 0.36)).toBeTruthy()
})
})
4 changes: 4 additions & 0 deletions modules/nlu/src/backend/tools/math.ts
Expand Up @@ -70,3 +70,7 @@ export function scalarMultiply(vec: number[], multiplier: number): number[] {
export function scalarDivide(vec: number[], divider: number): number[] {
return scalarMultiply(vec, 1 / divider)
}

export function allInRange(vec: number[], lower: number, upper: number): boolean {
return vec.map(v => _.inRange(v, lower, upper)).every(_.identity)
}
7 changes: 1 addition & 6 deletions modules/nlu/src/backend/typings.ts
Expand Up @@ -91,19 +91,14 @@ export interface NLUStructure {
includedContexts: string[]
slots: { [key: string]: sdk.NLU.Slot }
entities: sdk.NLU.Entity[]
ambiguous: boolean
intents: sdk.NLU.Intent[]
intent: sdk.NLU.Intent
tokens: string[]
}

export type Token2Vec = { [token: string]: number[] }

export interface PipelineProcessManager {
withPipeline(pipeline: Function[]): PipelineProcessManager
initFromText(text: string, includedContexts: string[]): PipelineProcessManager
run(): Promise<NLUStructure>
}

export interface LangsGateway {
[lang: string]: { source: LanguageSource; client: AxiosInstance; errors: number; disabledUntil?: Date }[]
}
Expand Down
1 change: 1 addition & 0 deletions src/bp/core/sdk/impl.ts
Expand Up @@ -77,6 +77,7 @@ export class IOEvent implements sdk.IO.Event {
entities: [],
language: 'n/a',
detectedLanguage: 'n/a',
ambiguous: false,
slots: {},
intent: { name: 'none', confidence: 1, context: 'global' },
intents: [],
Expand Down
1 change: 1 addition & 0 deletions src/bp/core/services/middleware/event-engine.ts
Expand Up @@ -40,6 +40,7 @@ const eventSchema = {
.array()
.items(joi.object())
.optional(),
ambiguous: joi.boolean(),
language: joi.string().optional(),
detectedLanguage: joi.string().optional(),
entities: joi
Expand Down
4 changes: 3 additions & 1 deletion src/bp/sdk/botpress.d.ts
Expand Up @@ -485,6 +485,8 @@ declare module 'botpress/sdk' {

export interface EventUnderstanding {
readonly intent: NLU.Intent
/** Predicted intents needs disambiguiation */
readonly ambiguous: boolean
readonly intents: NLU.Intent[]
/** The language used for prediction. Will be equal to detected langauge when its part of supported languages, falls back to default language otherwise */
readonly language: string
Expand All @@ -495,7 +497,7 @@ declare module 'botpress/sdk' {
readonly errored: boolean
readonly includedContexts: string[]
}

export interface IncomingEvent extends Event {
/** Array of possible suggestions that the Decision Engine can take */
readonly suggestions?: Suggestion[]
Expand Down

0 comments on commit c6f9395

Please sign in to comment.