Skip to content

Commit

Permalink
remove old versions of the suggest classifier. fixes #3
Browse files Browse the repository at this point in the history
  • Loading branch information
kurtismgit committed Aug 5, 2016
1 parent a55b908 commit c47936b
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 5 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Steps for adding this to your existing hubot:
export HUBOT_WATSON_NLC_URL=<API URL for Watson Natural Language Classifier>
export HUBOT_WATSON_NLC_USERNAME=<Watson NLC Username>
export HUBOT_WATSON_NLC_PASSWORD=<Watson NLC Password>
export HUBOT_WATSON_NLC_SUGGEST_PREFIX=<Optional prefix to include in NLC classifier name>
```

5. Start up your bot & off to the races!
Expand Down Expand Up @@ -58,6 +59,8 @@ export HUBOT_BLUEMIX_PASSWORD=<Password for the Bluemix use>
2. Download the jquery.min.js library to the `lib` folder of this project. This can be obtained from here: http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js
3. Run `npm run update-config` to generate or update `data/services-data.json`.
4. Review `data/services-data.json` file changes. Load the doc page of added services to confirm quality of doc and what the service is called on the doc pages. If services is referred to by names other than their doc_name attribute, then add those names to doc_name.
- Optional: Full manual training can be done by adding `class_text` array of strings in `nlc_class_info` objects.
- Optional: To still using crawler generated data, but also include partial manual training data add `additional_class_text` instead of `class_text`.
5. Use crawler to produce training data:
- `npm run crawler -- --key=<YOUR_ALCHEMY_API_KEY>`
6. Review output from crawler and copy the generated csv file into the data directory using the next version number of the `data/hubot-service-suggest` .csv file.
Expand Down
1 change: 1 addition & 0 deletions src/lib/env.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const nlc = {
url: process.env.HUBOT_WATSON_NLC_URL,
username: process.env.HUBOT_WATSON_NLC_USERNAME,
password: process.env.HUBOT_WATSON_NLC_PASSWORD,
prefix: process.env.HUBOT_WATSON_NLC_SUGGEST_PREFIX
};

if (!nlc.url) {
Expand Down
46 changes: 43 additions & 3 deletions src/scripts/suggest.service.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const fs = require('fs');
const path = require('path');
const TAG = path.basename(__filename);
const NLCManager = require('hubot-ibmcloud-cognitive-lib').nlcManager;
const watson = require('watson-developer-cloud');
const palette = require('hubot-ibmcloud-utils').palette;
const activity = require('hubot-ibmcloud-activity-emitter');
const env = require('../lib/env.js');
Expand Down Expand Up @@ -89,8 +90,8 @@ function getTrainingDataInfo(robot) {
if(matches) {
trainingDataInfo = {
path: path.resolve(__dirname, '../../data', matches[0]),
version: matches[1],
classifierName: matches[0].substring(0, matches[0].length - '.csv'.length)
version: parseInt(matches[1]),
classifierName: (env.nlc.prefix ? env.nlc.prefix + '-' : '') + matches[0].substring(0, matches[0].length - '.csv'.length)
};
break;
}
Expand All @@ -110,6 +111,43 @@ function getTrainingDataInfo(robot) {
return trainingDataInfo;
}

// finds previous versions of the classifiers for service suggests and deletes them.
function cleanupOldClassifiers(robot, trainingDataInfo) {
let nlc = watson.natural_language_classifier({
url: env.nlc.url,
username: env.nlc.username,
password: env.nlc.password,
version: 'v1'
});

let classifierBaseName = trainingDataInfo.classifierName.substring(0, trainingDataInfo.classifierName.lastIndexOf('-v'));
let regex = new RegExp(classifierBaseName + '-v(\\d+)');

robot.logger.debug(`${TAG}: checking for older versions of the classifier: ${classifierBaseName}`);
nlc.list({}, (err, response) => {
if (err) {
robot.logger.error(`${TAG}: error retrieving list of potential classifiers to remove. Error: ${JSON.stringify(err)}`);
}
else {
response.classifiers.forEach((classifier) => {
let matches = classifier.name.match(regex);

if(matches && matches[1] < trainingDataInfo.version) {
robot.logger.info(`${TAG}: Asynch call using nlc library to delete old service suggest classifier: ${classifier.name}`);
nlc.remove({classifier_id: classifier.classifier_id}, (err, result) => {
if (err){
robot.logger.error(`${TAG}: error removing old classifier: ${JSON.stringify(classifier)} Error: ${JSON.stringify(err)}`);
}
else {
robot.logger.info(`${TAG}: Successfully deleted old service suggest classifier: ${classifier.name}`);
}
});
}
});
}
});
}

function suggestServices(robot, res, description, nlcManager){

const text = description.trim();
Expand Down Expand Up @@ -187,9 +225,11 @@ module.exports = (robot) => {
training_data: fs.createReadStream(trainingDataInfo.path),
version: 'v1'
};

cleanupOldClassifiers(robot, trainingDataInfo);
nlcManager = new NLCManager(nlcOptions);

robot.logger.info(`${TAG}: checking status of NLC training for service suggest.`);
robot.logger.info(`${TAG}: checking status of NLC training for service suggest classifier: ${trainingDataInfo.classifierName}`);
nlcManager.trainIfNeeded().then((classifier)=>{
robot.logger.debug(`${TAG}: classifier for NLC service suggest: ${JSON.stringify(classifier)}`);

Expand Down
2 changes: 1 addition & 1 deletion test/resources/mock.classifierAvailable.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"name": "hubot-service-suggest-v2",
"language": "en",
"created": "2016-06-29T19:09:59.000Z",
"url": "https://testClassifierUrl",
"url": "https://watson-nlc-api/v1/classifiers/cd02b5x110-nlc-5103",
"status": "Available",
"status_description": "The classifier instance is now available and is ready to take classifier requests."
}
11 changes: 10 additions & 1 deletion test/resources/mock.classifierList.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
{
"classifiers": [
{
"classifier_id": "cd02b5x110-nlc-old",
"note": "This an old version that should be deleted when service suggest starts up",
"url": "https://watson-nlc-api/v1/classifiers/cd02b5x110-nlc-old",
"name": "hubot-service-suggest-v1",
"language": "en",
"created": "2016-06-29T15:24:48.919Z"
},
{
"classifier_id": "cd02b5x110-nlc-5103",
"url": "https://testClassifierUrl",
"note": "This is the current version whose ID will be used for other operations",
"url": "https://watson-nlc-api/v1/classifiers/cd02b5x110-nlc-5103",
"name": "hubot-service-suggest-v2",
"language": "en",
"created": "2016-06-29T15:24:48.919Z"
Expand Down
6 changes: 6 additions & 0 deletions test/service.suggest.mock.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,11 @@ module.exports = {
text: 'error'
})
.reply(500, 'Some 500 error message from the NLC service');

// Mock route for when the old classifier is deleted
nlcScope.delete('/v1/classifiers/cd02b5x110-nlc-old').reply(200, function() {
return {};
});

}
};

0 comments on commit c47936b

Please sign in to comment.