/
Word.pde
76 lines (60 loc) · 1.59 KB
/
Word.pde
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
class Word {
String word; // The String itself
int countBad; // The total times it appears in "bad" messages
int countGood; // The total times it appears in "good" messages
float rBad; // bad count / total bad words
float rGood; // good count / total good words
float pSpam; // probability this word is Spam
// Create a word, initialize all vars to 0
Word(String s) {
word = s;
countBad = 0;
countGood = 0;
rBad = 0.0f;
rGood = 0.0f;
pSpam = 0.0f;
}
// Increment bad counter
void countBad() {
countBad++;
}
// Increment good counter
void countGood() {
countGood++;
}
// Compute how often this word is bad
void calcBadProb(int total) {
if (total > 0) rBad = countBad / (float) total;
}
// Compute how often this word is good
void calcGoodProb(int total) {
if (total > 0) rGood = 2*countGood / (float) total; // multiply 2 to help fight against false positives (via Graham)
}
// Implement bayes rules to computer how likely this word is "spam"
void finalizeProb() {
if (rGood + rBad > 0) pSpam = rBad / (rBad + rGood);
if (pSpam < 0.01f) pSpam = 0.01f;
else if (pSpam > 0.99f) pSpam = 0.99f;
}
// The "interesting" rating for a word is
// How different from 0.5 it is
float interesting() {
return abs(0.5f - pSpam);
}
// Some getters and setters
float getPGood() {
return rGood;
}
float getPBad() {
return rBad;
}
float getPSpam() {
return pSpam;
}
void setPSpam(float f) {
pSpam = f;
}
String getWord() {
return word;
}
}