Permalink
Browse files

Remove duplicate analysis code in controller.

  • Loading branch information...
1 parent 71a7e16 commit 73503a8f15c0614e0d9d7424317accb20a13d9c8 @akavlie committed Jun 1, 2012
Showing with 7 additions and 228 deletions.
  1. +1 −226 controllers/AnalysisController.js
  2. +1 −0 models/ActivityItem.js
  3. +5 −2 test/activity-item.js
@@ -46,7 +46,7 @@ exports.controller = function (req, res) {
return finished();;
}
var item = items.shift();
- //console.log("Analyzing item: "+item.message);
+ console.log("Analyzing item: "+item.message);
//item.ratings = ratings;
item.analyzed_at = new Date();
item.save(function (err) {
@@ -69,231 +69,6 @@ exports.controller = function (req, res) {
}
-function analyze_item (_item, cb) {
- console.log('AnalysisController analyze_item()');
-
- var error = null;
- var item = _item;
-
- var ratings = item.ratings || {};
- var characteristics = item.characteristics || [];
- var topics = item.topics || [];
- if (!ratings.overall) {
- ratings.overall = 0;
- }
- var message = item.message.toLowerCase();
-
- var url_pattern = /\(?\bhttps?:\/\/[-A-Za-z0-9+&@#\/%?=~_()|!:,.;]*[-A-Za-z0-9+&@#\/%=~_()|]/gi;
- var urls = message.match(url_pattern);
- var hash_pattern = /#[a-zA-Z_]*/gi;
- var hashtags = message.match(hash_pattern);
-
- urls = urls || [];
- hashtags = hashtags || [];
-
- var keywords = [];
- urls.forEach(function (url) {
- domain = url.substr(url.indexOf("//")+2);
- domain = domain.substr(0, domain.indexOf("/"));
- var found = false;
- keywords.forEach(function (existing) {
- if (existing == domain) {
- found = true;
- }
- });
- if (!found) {
- keywords.push(domain);
- }
- });
- hashtags.forEach(function (tag) {
- tag = tag.substring(1);
- var found = false;
- keywords.forEach(function (existing) {
- if (existing == tag) {
- found = true;
- }
- });
- if (!found) {
- keywords.push(tag);
- }
- });
-
- message = message.remove_urls().remove_hashtags().remove_screen_names().replace_punctuation();
- var words = tokenizer.tokenize(message);//.tokenizeAndStem();
-
-
- //console.log(words);
-
- function lookup_next_word () {
- if (words.length == 0) {
- return add_topics();
- }
- word = words.shift();
- //console.log("> Word: "+word);
-
- var neither = 0;
- var noun = 0;
- var verb = 0;
-
- if (word.length > 2) {
- if (word.substring(0, word.length-3) == "ing") {
- verb = 100;
- classify_word(word, noun, verb, neither);
- } else
- if (word.match(/^[0-9]*$/)) {
- classify_word(word, noun, verb, neither);
- } else {
- wordnet.lookup(word, function(results) {
-
- results.forEach(function(result) {
- if (result.pos == "n") {
- noun++;
- } else
- if (result.pos == "v") {
- verb++;
- } else
- if (result.pos == "a" || result.pos == "r" || result.pos == "s") {
- neither++;
- }
- });
- classify_word(word, noun, verb, neither);
- });
- return;
- }
- } else {
- classify_word(word, noun, verb, neither);
- }
- }
- function classify_word (w, n, v, neither) {
- if ((n == 0 && v == 0) || neither > n+v/2) {
- console.log("I don't know what kind of word '"+w+"' is");
- } else {
- console.log("Looked up '"+w+"' and found it is a ... "+(n >= v ? "noun ("+n+":"+v+")" : "verb ("+v+":"+n+")"));
- var found = false;
- keywords.forEach(function (existing) {
- if (existing == w) {
- found = true;
- }
- });
- if (!found) {
- keywords.push(w);
- }
- }
- lookup_next_word();
- }
- lookup_next_word();
-
- function add_topics () {
- var existing_topics = [];
- console.log("Getting topics");
-
- if (keywords.length > 0) {
- Topic.find({text: {"$in": keywords}}, function (err, t) {
- existing_topics = t;
- add_new_topics();
- });
- } else {
- done_with_topics();
- }
-
- function add_new_topics () {
- var new_topics = [];
- keywords.forEach(function (keyword) {
- var found = false;
- existing_topics.forEach(function (existing) {
- if (existing.text == keyword) {
- found = true;
- }
- });
- if (!found) {
- new_topics.push(keyword);
- }
- });
-
- function add_each_topic () {
- if (new_topics.length == 0) {
- return done_adding();
- }
- topic_text = new_topics.shift();
- var topic = new Topic({text: topic_text, ratings: {overall: 0}});
- topic.save(function (err) {
- add_each_topic();
- });
- }
- add_each_topic();
- }
-
- function done_adding () {
- var topic_ids = [];
- Topic.find({text: {"$in": keywords}}, function (err, t) {
- t.forEach(function (topic) {
- topic_ids.push(topic._id);
- });
- item.topics = topic_ids;
- console.log("ADDED TOPIC IDS! "+item.topics.length);
- item.commit("topics");
-
- done_with_topics();
- });
-
- }
-
- function done_with_topics () {
- console.log("calling back... "+item.topics.length);
-
- rate_that_shit();
- }
-
- function rate_that_shit () {
- var topic_ratings = 0;
- var topic_count = 0;
- var char_ratings = 0;
- var char_count = 0;
-
- ActivityItem.findOne({_id: item.id})
- .populate("characteristics")
- .populate("topics")
- .run(function (err, _item) {
- _item.topics.forEach(function (topic) {
- if (topic.ratings.overall > 0 || topic.ratings.overall < 0) {
- topic_ratings += topic.ratings.overall;
- topic_count++;
- }
- });
- _item.characteristics.forEach(function (ch) {
- char_ratings += parseInt(ch.rating);
- char_count++;
- });
-
- topic_count = topic_count > 1 ? topic_count : 1;
- char_count = char_count > 1 ? char_count : 1;
-
- ratings.topics = topic_ratings;
- ratings.characteristics = char_ratings;
- ratings.overall = 0 + (topic_ratings/topic_count + char_ratings/char_count*2)/3;
- });
- //rate_by_topics();
- //rate_by_characteristics();
- //rate_by_behavior();
-
- //Topic.find({_id: {"$in": item.topics}}, function (
-
- item.ratings = ratings;
- item.commit("ratings");
- cb(error, item);
- }
- }
-}
-
-function get_topics (text, cb) {
- var error = null;
- var my_topics = [];
-
-
-
- cb(error, my_topics);
-}
-
if (!String.prototype.remove_urls) {
String.prototype.remove_urls = function () {
var url_pattern = /\(?\bhttps?:\/\/[-A-Za-z0-9+&@#\/%?=~_()|!:,.;]*[-A-Za-z0-9+&@#\/%=~_()|]/gi;
@@ -499,4 +499,5 @@ if (!String.prototype.replace_punctuation) {
mongoose.model('ActivityItem', ActivityItemSchema);
exports.ActivityItem = ActivityItemSchema;
+exports.analyze_me = analyze_me;
@@ -1,10 +1,13 @@
-var assert = require('assert');
+var assert = require('assert')
+ , analyze_me = require('../models/ActivityItem').analyze_me
+ , mongoose = require('mongoose');
suite('Natural Language', function() {
setup(function() {
+ mongoose.connect('mongodb://localhost/testdb');
});
- suite('something', function() {
+ suite('natural', function() {
});
});

0 comments on commit 73503a8

Please sign in to comment.