Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 58 lines (46 sloc) 1.216 kb
69547af @alexandru added bayes classifier
authored
1
2 class StuffClassifier::Bayes < StuffClassifier::Base
3 # http://en.wikipedia.org/wiki/Naive_Bayes_classifier
4
605f753 @alexandru added storages
authored
5 attr_writer :thresholds
6
69547af @alexandru added bayes classifier
authored
7 def initialize(name, opts={})
8 super(name, opts)
9 @thresholds = {}
4ff3f01 @alexandru defined interface
authored
10 end
11
69547af @alexandru added bayes classifier
authored
12 def doc_prob(text, category)
13 each_word(text).map {|w|
14 word_weighted_average(w, category)
15 }.inject(1) {|p,c| p * c}
4ff3f01 @alexandru defined interface
authored
16 end
17
69547af @alexandru added bayes classifier
authored
18 def text_prob(text, category)
19 cat_prob = cat_count(category) / total_count
20 doc_prob = doc_prob(text, category)
21 cat_prob * doc_prob
22 end
23
24 def cat_scores(text)
25 probs = {}
26 categories.each do |cat|
27 probs[cat] = text_prob(text, cat)
28 end
29 probs.map{|k,v| [k,v]}.sort{|a,b| b[1] <=> a[1]}
30 end
31
32 def classify(text, default=nil)
33 # Find the category with the highest probability
34 max_prob = 0.0
35 best = nil
36
37 scores = cat_scores(text)
38 scores.each do |score|
39 cat, prob = score
40 if prob > max_prob
41 max_prob = prob
42 best = cat
43 end
44 end
45
46 return default unless best
47 threshold = @thresholds[best] || 1.0
48
49 scores.each do |score|
50 cat, prob = score
51 next if cat == best
52 return default if prob * threshold > max_prob
53 end
54
55 return best
4ff3f01 @alexandru defined interface
authored
56 end
57 end
Something went wrong with that request. Please try again.