Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added training function to pull streams from various lat/lons to get …
…language training data
- Loading branch information
1 parent
e2657d2
commit 6ff856c
Showing
3 changed files
with
70 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,48 @@ | ||
{ | ||
"name": "naive-baysian-twitter", | ||
"description": "", | ||
"version": "0.0.0", | ||
"author": "Brendan Nee <me@bn.ee>", | ||
"dependencies": { | ||
"express": ">= 2.5.8" | ||
, "mongoose": ">= 2.5.7" | ||
, "async": ">= 0.1.16" | ||
, "request": ">= 2.9.152" | ||
, "underscore": ">= 1.3.1" | ||
, "ntwitter": ">=0.2.10" | ||
, "socket.io": ">=0.9.1-1" | ||
}, | ||
"main": "index" | ||
var models = require('../models/models') | ||
, async = require('async') | ||
, _ = require('underscore') | ||
, languages = require('./languages'); | ||
|
||
module.exports = function train(app, cb){ | ||
var Tweet = app.set('db').model('Tweet') | ||
, Probability = app.set('db').model('Probability') | ||
, twit = app.set('twit'); | ||
|
||
async.forEachSeries(languages, trainLanguage, cb); | ||
|
||
function trainLanguage(language, cb){ | ||
try{ | ||
console.log('Training for ' + language.name); | ||
//get approx 100 mile bounding box around location | ||
var boxWidth = 100/69; | ||
var sampleTime = 60000 | ||
twit.search('place:' + language.loc, {rpp:100}, function(err, data) { | ||
console.log(data.results.length); | ||
async.forEachSeries(data.results, processTweet, function(e, results){ | ||
cb(); | ||
}); | ||
|
||
}); | ||
} catch(e) { | ||
cb(); | ||
} | ||
|
||
function processTweet(data, cb){ | ||
try{ | ||
//classify tweet based on language | ||
var tweet = new Tweet(data); | ||
|
||
tweet.trained_language = language.code; | ||
tweet.trained = true; | ||
tweet.autotrained = true; | ||
tweet.save(function(e, result){ | ||
cb(); | ||
}); | ||
} catch(e) { | ||
console.log('error'); | ||
cb(); | ||
} | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters