-
Notifications
You must be signed in to change notification settings - Fork 71
/
compute-current-tcr
executable file
·120 lines (100 loc) · 2.27 KB
/
compute-current-tcr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/perl
my $lambda = 1;
my %scores;
my $numpasses = 10;
my $sumtcr = 0;
my $sumsr = 0;
my $sumsp = 0;
open(SPAM, "<spam.log");
open(HAM, "<ham.log");
open(SCORES, "<../rules/50_scores.cf");
while(<SCORES>)
{
next unless /^score\s+([^\s]*)\s+([-0-9.]*)/;
$scores{$1} = $2;
}
close(SCORES);
while(<SPAM>)
{
next if /^\#/;
/.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;
my $testshit = $1; $testshit ||= '';
my @rules;
foreach my $r (split(/,/, $testshit)) {
my $hits = 1;
# Support compacted RULE(hitcount) format
if ($r =~ s/\((\d+)\)$//) {
$hits = $1;
}
push @rules, $r for (1 .. $hits);
}
my $score = 0.0;
foreach $rule (@rules) {
next unless (defined ($scores{$rule}));
$score += $scores{$rule};
}
if($score < 5)
{
add_point (1, 0, $score);
} else {
add_point (1, 1, $score);
}
}
close(SPAM);
while(<HAM>)
{
next if /^\#/;
/.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;
my $testshit = $1; $testshit ||= '';
my @rules;
foreach my $r (split(/,/, $testshit)) {
my $hits = 1;
# Support compacted RULE(hitcount) format
if ($r =~ s/\((\d+)\)$//) {
$hits = $1;
}
push @rules, $r for (1 .. $hits);
}
my $score = 0.0;
foreach $rule (@rules) {
next unless (defined ($scores{$rule}));
$score += $scores{$rule};
}
if($score >= 5) {
add_point (0, 1, $score);
} else {
add_point (0, 0, $score);
}
}
my $werr = ($lambda * $nlegitspam + $nspamlegit)
/ ($lambda * $nlegit + $nspam);
my $werr_base = $nspam
/ ($lambda * $nlegit + $nspam);
$werr ||= 0.000001; # avoid / by 0
my $tcr = $werr_base / $werr;
my $sr = ($nspamspam / $nspam) * 100.0;
my $sp = ($nspamspam / ($nspamspam + $nlegitspam)) * 100.0;
$sumtcr += $tcr;
$sumsr += $sr;
$sumsp += $sp;
printf ("TCR: %3.6f SpamRecall: %3.6f%% SpamPrecision: %3.6f%%\n",
$tcr, $sr, $sp);
exit;
sub add_point {
my ($isspam, $filedasspam, $score) = @_;
if ($isspam) {
$nspam++;
if ($filedasspam) {
$nspamspam++;
} else {
$nspamlegit++;
}
} else {
$nlegit++;
if ($filedasspam) {
$nlegitspam++;
} else {
$nlegitlegit++;
}
}
}