Permalink
Browse files

Add some more bad words and spam that slipped through the cracks.

  • Loading branch information...
1 parent bbe48ee commit 412d8e178c1daa2d09247e2c5c38b49ec0e0280b @courtenay courtenay committed Feb 6, 2009
@@ -9,12 +9,12 @@ class << self
def run
bad_words = %w( sex sexy porn gay erotica viagra erotismo porno porn lesbian amateur tit)
bad_words |= %w( gratis erotismo porno torrent bittorrent adulto )
- bad_words |= %w( cialis viagra payday loan )
- bad_words |= %w( webcam free-web-host)
+ bad_words |= %w( cialis viagra payday loan jihad )
+ bad_words |= %w( webcam free-web-host rapidshare muslim)
bad_words << /pel?cula/ << /pornogr?fica/ << "portal porno" # srsly, spamming in spanish?
- suspicious_words = %w( free buy galleries dating gallery hard hardcore video homemade celebrity ) << "credit card"
- suspicious_words |= %w( adult pharmacy overnight shipping free hot movie nylon arab ?????? xxx)
+ suspicious_words = %w( free buy galleries dating gallery hard hardcore video homemade celebrity ) << "credit card" << "my friend" << "friend sent me"
+ suspicious_words |= %w( adult pharmacy overnight shipping free hot movie nylon arab ?????? xxx) << "sent me a link"
suspicious_words << "forums/member.php?u=" << "chat room" << "free chat" << "yahoo chat" << "page.php"
bad_words.each do |word|
results = @body.downcase.scan(word)
@@ -24,6 +24,9 @@ def run
@body.scan(/<a[^>]+>(.*?)<\/a>/).each do |match|
add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a link: #{word}"
end
+ @body.scan(/\nhttp:\/\/(.*?#{word})/).each do |match|
+ add_score self.class.bad_word_score ** 4 * match[0].scan(word).size, "nasty word inside a straight-up link: #{word}"
+ end
@body.scan(/<a(.*?)>/).each do |match|
add_score self.class.bad_word_score * 4 * match[0].scan(word).size, "nasty word inside a URL: #{word}"
end
View
@@ -21,6 +21,7 @@ def run
'cn' => 20, # Chinese? spammer.
'us' => 8, # .us ? possibly spam
'it' => 5,
+ 'tk' => 20,
'pl' => 8,
'info' => 20,
'biz' => 40 # no-one uses these for reals
@@ -32,8 +33,8 @@ def run
tokens = @body.split(" ")
if tokens[-1] =~ /^http:\/\//
- add_score 50, "Text ends in a link"
- add_score 10, "Text ends in a link and only has one link" if link_count == 1
+ add_score 10, "Text ends in a http token"
+ add_score 50, "Text ends in a http token and only has one token" if link_count == 1
end
@body.scan(/http:\/\/(.*?)[\/\]?]/) do |match|
@@ -0,0 +1,10 @@
+
+Wooohohoho check this great private porn collection. I have downloaded from one site
+where i pay monthly 74.99$ and than uploaded for u:)
+
+
+http://rapidshare.com/files/194677140/Cute_Amateur_Black_girl_deepthroats_White_boyfriend.wmv
+http://rapidshare.com/files/194677761/Ultimate_Anal.wmv
+http://rapidshare.com/files/194677995/Bubblebutt_gets_assfucked.wmv
+
+I hope u like it hehe
@@ -0,0 +1,4 @@
+My friend Christy sent me a link to a movie about radical muslims in America. These guys are absolutely nuts...
+they want to take over our country.
+
+http://www.thethirdjihad3.tk
View
@@ -81,7 +81,7 @@ def test_scores_spam_really_high
end
spam = comment.splam?
score = comment.splam_score
- #$stderr.puts "#{f} score: #{score}"
+ #$stderr.puts "#{f} score: #{score}\n#{comment.splam_reasons.inspect}"
#$stderr.puts "====================="
assert spam, "Comment #{f} was not spam, score was #{score} but threshold was #{Foo.splam_suite.threshold}\nReasons were #{comment.splam_reasons.inspect}"
end

0 comments on commit 412d8e1

Please sign in to comment.