/
model.js
73 lines (63 loc) · 2.01 KB
/
model.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
(function () {
/*
* Wrapper around the naive bayes classifier.
* Handles turning the data into a format that we
* like, loading it, saving it, etc.
*/
"use strict";
var Credulous = require('credulous')
, fs = require('fs')
, url = require('url')
, model
, modelPath = __dirname + '/../model/model.json'
, modelWrapper
;
function getDomainName(urlString) {
var name = url.parse(urlString).host
;
// Hacker news api returns weird domain names for HN urls
if (!name) {
name = 'news.ycombinator.com';
}
return name;
}
modelWrapper = {
modelPath: modelPath,
loadModel: function() {
var modelJSON = fs.readFileSync(modelPath, 'utf8');
// Just some dummy stuff so we can load the real thing.
model = new Credulous({labels: ['1','2','3']});
model.fromJSON(JSON.parse(modelJSON));
console.log('length is ', model.trainArgumentsLength);
},
saveModel: function() {
var modelJSON = model.toJSON();
fs.writeFileSync(modelPath, JSON.stringify(modelJSON), 'utf8');
},
createModel: function() {
model = new Credulous({labels: ['like', 'dislike'], dataLength: 3});
},
/*
* Train the model based on three criteria:
* 1. The contents of the title.
* 2. The username of the submitter.
* 3. The domain name of the submission.
*/
trainModel: function(post, label) {
var domainName = getDomainName(post.url)
, blackList = /[,'":;(){}\[\]\/\-]/g
, title = post.title.toLowerCase().replace(blackList, '')
;
console.log(title, post.submittedBy, domainName);
model.train(title, post.submittedBy, domainName, label);
},
classifyPost: function(post) {
var domainName = getDomainName(post.url)
, blackList = /[,'":;(){}\[\]\/\-]/g
, title = post.title.toLowerCase().replace(blackList, '');
;
return model.classify(title, post.submittedBy, domainName);
}
}
module.exports = modelWrapper;
}());