Skip to content

Commit

Permalink
feat(nlu): native NLU has better ranking and false-positive detection
Browse files Browse the repository at this point in the history
  • Loading branch information
slvnperron committed May 4, 2018
1 parent cfe9149 commit 6c8e8c8
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 16 deletions.
3 changes: 2 additions & 1 deletion packages/functionals/botpress-nlu/config.json
Expand Up @@ -2,5 +2,6 @@
"intentsDir": "./intents",
"entitiesDir": "./entities",
"provider": "native",
"debugModeEnabled": false
"debugModeEnabled": false,
"minimumConfidence": "0.3"
}
3 changes: 2 additions & 1 deletion packages/functionals/botpress-nlu/package.json
Expand Up @@ -54,7 +54,8 @@
"sass-loader": "^6.0.6",
"style-loader": "^0.20.1",
"webpack": "^3.11.0",
"webpack-node-externals": "^1.6.0"
"webpack-node-externals": "^1.6.0",
"zscore": "^0.1.0"
},
"dependencies": {
"axios": "^0.17.1",
Expand Down
67 changes: 58 additions & 9 deletions packages/functionals/botpress-nlu/src/index.js
Expand Up @@ -43,7 +43,15 @@ module.exports = {
recastBotSlug: { type: 'string', required: false, default: '', env: 'NLU_RECAST_BOT_SLUG' },

// Debug mode will print NLU information to the console for debugging purposes
debugModeEnabled: { type: 'bool', required: true, default: false, env: 'NLU_DEBUG_ENABLED' }
debugModeEnabled: { type: 'bool', required: true, default: false, env: 'NLU_DEBUG_ENABLED' },

// The minimum confidence required (in %) for an intent to match
// Set to '0' to always match
minimumConfidence: { type: 'string', required: false, default: '0.3', env: 'NLU_MIN_CONFIDENCE' },

// The maximum confidence after which it is considered a statistical error
// Mostly irrelevant except for NLU=native
maximumConfidence: { type: 'string', required: false, default: '1000', env: 'NLU_MAX_CONFIDENCE' }
},

init: async function(bp, configurator) {
Expand All @@ -63,6 +71,18 @@ module.exports = {
throw new Error(`Unknown NLU provider "${config.provider}"`)
}

let MIN_CONFIDENCE = parseFloat(config.minimumConfidence)

if (isNaN(MIN_CONFIDENCE) || MIN_CONFIDENCE < 0 || MIN_CONFIDENCE > 1) {
MIN_CONFIDENCE = 0
}

let MAX_CONFIDENCE = parseFloat(config.maximumConfidence)

if (isNaN(MAX_CONFIDENCE) || MAX_CONFIDENCE < 0) {
MAX_CONFIDENCE = 10000
}

provider = new Provider({
logger: bp.logger,
storage: storage,
Expand All @@ -79,9 +99,9 @@ module.exports = {
max_tries: 3
}

async function incomingMiddleware(event, next) {
async function processEvent(event) {
if (['session_reset', 'bp_dialog_timeout'].includes(event.type)) {
return next()
return
}

try {
Expand All @@ -90,32 +110,61 @@ module.exports = {
}

const metadata = await retry(() => provider.extract(event), retryPolicy)

if (metadata) {
Object.assign(event, { nlu: metadata })
}
} catch (err) {
bp.logger.warn('[NLU] Error extracting metadata for incoming text: ' + err.message)
}

const intentConfidentEnough = () =>
(_.get(event, 'nlu.intent.confidence') || 1) >= MIN_CONFIDENCE &&
(_.get(event, 'nlu.intent.confidence') || 1) <= MAX_CONFIDENCE

const intentStartsWith = prefix => {
return (
intentConfidentEnough() &&
(_.get(event, 'nlu.intent.name') || '').toLowerCase().startsWith(prefix && prefix.toLowerCase())
)
}

const intentIs = intentName => {
return (
intentConfidentEnough() &&
(_.get(event, 'nlu.intent.name') || '').toLowerCase() === (intentName && intentName.toLowerCase())
)
}

const bestMatchProps = {
is: intentIs,
startsWith: intentStartsWith,
intentConfidentEnough
}

_.merge(event, {
nlu: {
intent: {
is: intentName =>
(_.get(event, 'nlu.intent.name') || '').toLowerCase() === (intentName && intentName.toLowerCase())
},
intent: bestMatchProps,
intents: {
has: intentName => !!(_.get(event, 'nlu.intents') || []).find(i => i.name === intentName)
}
}
})
next()
}

bp.nlu = {
processEvent,
provider
}

bp.middlewares.register({
name: 'nlu.incoming',
module: 'botpress-nlu',
type: 'incoming',
handler: incomingMiddleware,
handler: async (event, next) => {
await processEvent(event)
next()
},
order: 10,
description:
'Process natural language in the form of text. Structured data with an action and parameters for that action is injected in the incoming message event.'
Expand Down
24 changes: 19 additions & 5 deletions packages/functionals/botpress-nlu/src/providers/native.js
@@ -1,6 +1,7 @@
import _ from 'lodash'
import crypto from 'crypto'

import zscore from 'zscore'
import natural from 'natural'

import Provider from './base'
Expand Down Expand Up @@ -109,11 +110,24 @@ export default class NativeProvider extends Provider {
}
}

const classifications = this.classifier.getClassifications(incomingEvent.text)
const intents = _.map(_.orderBy(classifications, ['value'], ['desc']), c => ({
intent: c.label,
score: c.value
}))
const classifications = _.orderBy(this.classifier.getClassifications(incomingEvent.text), ['value'], ['desc'])

let allScores = zscore(_.map(classifications, c => parseFloat(c.value)))
allScores = allScores.map((s, i) => {
return (
s -
(_.get(allScores, i + 1) || 0) * 0.5 -
(_.get(allScores, i + 2) || 0) * 0.75 -
(_.get(allScores, i + 3) || 0)
)
})

const intents = _.map(classifications, (c, i) => {
return {
intent: c.label,
score: allScores[i]
}
})

const bestIntent = _.first(intents)

Expand Down
10 changes: 10 additions & 0 deletions packages/functionals/botpress-nlu/yarn.lock
Expand Up @@ -1634,6 +1634,10 @@ cyclist@~0.2.2:
version "0.2.2"
resolved "https://registry.yarnpkg.com/cyclist/-/cyclist-0.2.2.tgz#1b33792e11e914a2fd6d6ed6447464444e5fa640"

d3-array@0.7.1:
version "0.7.1"
resolved "https://registry.yarnpkg.com/d3-array/-/d3-array-0.7.1.tgz#a321c21558459d994eb4ad47b478240e64933942"

d@1:
version "1.0.0"
resolved "https://registry.yarnpkg.com/d/-/d-1.0.0.tgz#754bb5bfe55451da69a58b94d45f4c5b0462d58f"
Expand Down Expand Up @@ -4983,3 +4987,9 @@ yargs@~3.10.0:
cliui "^2.1.0"
decamelize "^1.0.0"
window-size "0.1.0"

zscore@^0.1.0:
version "0.1.0"
resolved "https://registry.yarnpkg.com/zscore/-/zscore-0.1.0.tgz#21aa0998243708d2c5bf61bc724dfd7223c983c2"
dependencies:
d3-array "0.7.1"

0 comments on commit 6c8e8c8

Please sign in to comment.