Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ae13cad
commit 08ebdbc
Showing
2 changed files
with
215 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# -*- coding: utf-8 -*- | ||
import re | ||
from .basicanalyzer import BasicAnalyzer | ||
|
||
|
||
class ExactWordAnalyzer(BasicAnalyzer): | ||
"""Analyzer to match the content of a paste by words""" | ||
name = "ExactWordAnalyzer" | ||
|
||
def __init__(self, actions, words, blacklist=None, case_sensitive=False): | ||
super().__init__(actions, "{0} ({1})".format(self.name, words)) | ||
|
||
if words is None: | ||
self.words = [] | ||
elif isinstance(words, list): | ||
self.words = words | ||
else: | ||
self.words = [words] | ||
|
||
self.blacklist = blacklist or [] | ||
self.case_sensitive = case_sensitive | ||
|
||
def _blacklist_word_found(self, text): | ||
blacklist = self.blacklist | ||
|
||
if not len(blacklist): | ||
return False | ||
|
||
if not self.case_sensitive: | ||
text = text.lower() | ||
blacklist = [word.lower() for word in blacklist] | ||
|
||
return any((self._word_in_text(word, text) for word in blacklist)) | ||
|
||
def add_word(self, word): | ||
""" | ||
Add a word to the analyzer | ||
:param word: Word to be added | ||
:return: | ||
""" | ||
self.words.append(word) | ||
|
||
def match(self, paste): | ||
"""Check if the specified words are part of the paste text""" | ||
if paste is None: | ||
return False | ||
|
||
paste_content = paste.body or "" | ||
|
||
if self._blacklist_word_found(paste_content): | ||
return False | ||
|
||
words = self.words | ||
|
||
if not self.case_sensitive: | ||
paste_content = paste_content.lower() | ||
words = [word.lower() for word in words] | ||
|
||
return [ | ||
word for word in words | ||
if self._word_in_text(word, paste_content) | ||
] | ||
|
||
def _word_in_text(self, word, text): | ||
pattern = r'\b' + word + r'\b' | ||
return re.search(pattern, text) is not None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
# -*- coding: utf-8 -*- | ||
import unittest | ||
from unittest import mock | ||
|
||
from pastepwn.actions.basicaction import BasicAction | ||
from pastepwn.analyzers.exactwordanalyzer import ExactWordAnalyzer | ||
|
||
|
||
class TestExactWordAnalyzer(unittest.TestCase): | ||
def setUp(self): | ||
self.paste = mock.Mock() | ||
|
||
def test_match(self): | ||
analyzer = ExactWordAnalyzer(None, "Test") | ||
self.paste.body = "This is a Test" | ||
self.assertTrue(analyzer.match(self.paste)) | ||
|
||
self.paste.body = "There is a test for everything" | ||
self.assertTrue(analyzer.match(self.paste)) | ||
|
||
self.paste.body = "There are tests for everything" | ||
self.assertFalse(analyzer.match(self.paste)) | ||
|
||
self.paste.body = "This is a Test for a longer sentence" | ||
self.assertTrue(analyzer.match(self.paste)) | ||
|
||
self.paste.body = "Completely unrelated" | ||
self.assertFalse(analyzer.match(self.paste)) | ||
|
||
def test_blacklist(self): | ||
blacklist = ["fake", "bad"] | ||
analyzer = ExactWordAnalyzer(None, "Test", blacklist=blacklist) | ||
|
||
self.paste.body = "This is a Test" | ||
self.assertTrue(analyzer.match(self.paste)) | ||
|
||
self.paste.body = "This is a fake Test" | ||
self.assertFalse(analyzer.match(self.paste)) | ||
|
||
analyzer = ExactWordAnalyzer(None, "Test", blacklist=blacklist, case_sensitive=True) | ||
|
||
self.paste.body = "This is a Test" | ||
self.assertTrue(analyzer.match(self.paste)) | ||
|
||
self.paste.body = "This is a Fake Test" | ||
self.assertTrue(analyzer.match(self.paste)) | ||
|
||
def test_multiple_words(self): | ||
analyzer = ExactWordAnalyzer(None, None) | ||
self.assertEqual(analyzer.words, []) | ||
|
||
analyzer = ExactWordAnalyzer(None, ["My", "first", "Test"]) | ||
self.paste.body = "This is a little test for something" | ||
self.assertTrue(analyzer.match(self.paste)) | ||
|
||
self.paste.body = "You are my best friend so far!" | ||
self.assertTrue(analyzer.match(self.paste)) | ||
|
||
self.paste.body = "This is the first time I try this" | ||
self.assertTrue(analyzer.match(self.paste)) | ||
|
||
self.paste.body = "This time we try to match multiple words/tests for the first time." | ||
self.assertTrue(analyzer.match(self.paste)) | ||
|
||
self.paste.body = "None of the words are contained!" | ||
self.assertFalse(analyzer.match(self.paste)) | ||
|
||
# Check for case sensitivity for multiple words | ||
analyzer2 = ExactWordAnalyzer(None, ["My", "first", "Test"], case_sensitive=True) | ||
|
||
self.paste.body = "That's not my issue!" | ||
self.assertFalse(analyzer2.match(self.paste)) | ||
|
||
self.paste.body = "That's not My issue!" | ||
self.assertTrue(analyzer2.match(self.paste)) | ||
|
||
def test_add_word(self): | ||
analyzer = ExactWordAnalyzer(None, "Test") | ||
self.assertEqual(len(analyzer.words), 1) | ||
self.assertEqual(analyzer.words, ["Test"]) | ||
|
||
analyzer.add_word("second") | ||
self.assertEqual(len(analyzer.words), 2) | ||
self.assertEqual(analyzer.words, ["Test", "second"]) | ||
|
||
def test_case_sensitive(self): | ||
analyzer = ExactWordAnalyzer(None, "Test", case_sensitive=True) | ||
self.paste.body = "This is a Test for case sensitivity" | ||
self.assertTrue(analyzer.match(self.paste)) | ||
|
||
self.paste.body = "This is a test for case sensitivity" | ||
self.assertFalse(analyzer.match(self.paste)) | ||
|
||
self.paste.body = "This is a tESt for case sensitivity" | ||
self.assertFalse(analyzer.match(self.paste)) | ||
|
||
analyzer2 = ExactWordAnalyzer(None, "Te1st") | ||
self.paste.body = "This is a te1st for case sensitivity" | ||
self.assertTrue(analyzer2.match(self.paste)) | ||
|
||
analyzer2 = ExactWordAnalyzer(None, "Te1st") | ||
self.paste.body = "This is a tE1st for case sensitivity" | ||
self.assertTrue(analyzer2.match(self.paste)) | ||
|
||
def test_multiple_case_sensitive(self): | ||
"""Test if it's possible to match any of multiple words in a wordanalyzer when case sensitivty is activated""" | ||
analyzer = ExactWordAnalyzer(None, ["My", "first", "Test"], case_sensitive=True) | ||
self.paste.body = "This is a little test for something" | ||
self.assertFalse(analyzer.match(self.paste)) | ||
|
||
self.paste.body = "You are my best friend so far!" | ||
self.assertFalse(analyzer.match(self.paste)) | ||
|
||
self.paste.body = "Myself and I" | ||
self.assertFalse(analyzer.match(self.paste)) | ||
|
||
self.paste.body = "This is a Test for case sensitivity" | ||
match = analyzer.match(self.paste) | ||
self.assertTrue(match) | ||
self.assertEqual(1, len(match)) | ||
self.assertEqual("Test", match[0]) | ||
|
||
self.paste.body = "This is a test for case sensitivity. It's the first of its kind." | ||
match = analyzer.match(self.paste) | ||
self.assertTrue(match) | ||
self.assertEqual(1, len(match)) | ||
self.assertEqual("first", match[0]) | ||
|
||
def test_match_none(self): | ||
analyzer = ExactWordAnalyzer(None, "Test") | ||
self.paste.body = None | ||
self.assertFalse(analyzer.match(self.paste)) | ||
|
||
self.paste = None | ||
self.assertFalse(analyzer.match(self.paste)) | ||
|
||
def test_match_empty(self): | ||
analyzer = ExactWordAnalyzer(None, "Test") | ||
self.paste.body = "" | ||
self.assertFalse(analyzer.match(self.paste)) | ||
|
||
def test_actions_present(self): | ||
action = mock.MagicMock(spec=BasicAction) | ||
analyzer = ExactWordAnalyzer(action, "Test") | ||
self.assertEqual([action], analyzer.actions) | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |