Skip to content

Commit

Permalink
add exact word analyzer and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
colincrawford committed Sep 25, 2020
1 parent ae13cad commit 08ebdbc
Show file tree
Hide file tree
Showing 2 changed files with 215 additions and 0 deletions.
66 changes: 66 additions & 0 deletions pastepwn/analyzers/exactwordanalyzer.py
@@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
import re
from .basicanalyzer import BasicAnalyzer


class ExactWordAnalyzer(BasicAnalyzer):
"""Analyzer to match the content of a paste by words"""
name = "ExactWordAnalyzer"

def __init__(self, actions, words, blacklist=None, case_sensitive=False):
super().__init__(actions, "{0} ({1})".format(self.name, words))

if words is None:
self.words = []
elif isinstance(words, list):
self.words = words
else:
self.words = [words]

self.blacklist = blacklist or []
self.case_sensitive = case_sensitive

def _blacklist_word_found(self, text):
blacklist = self.blacklist

if not len(blacklist):
return False

if not self.case_sensitive:
text = text.lower()
blacklist = [word.lower() for word in blacklist]

return any((self._word_in_text(word, text) for word in blacklist))

def add_word(self, word):
"""
Add a word to the analyzer
:param word: Word to be added
:return:
"""
self.words.append(word)

def match(self, paste):
"""Check if the specified words are part of the paste text"""
if paste is None:
return False

paste_content = paste.body or ""

if self._blacklist_word_found(paste_content):
return False

words = self.words

if not self.case_sensitive:
paste_content = paste_content.lower()
words = [word.lower() for word in words]

return [
word for word in words
if self._word_in_text(word, paste_content)
]

def _word_in_text(self, word, text):
pattern = r'\b' + word + r'\b'
return re.search(pattern, text) is not None
149 changes: 149 additions & 0 deletions pastepwn/analyzers/tests/exactwordanalyzer_test.py
@@ -0,0 +1,149 @@
# -*- coding: utf-8 -*-
import unittest
from unittest import mock

from pastepwn.actions.basicaction import BasicAction
from pastepwn.analyzers.exactwordanalyzer import ExactWordAnalyzer


class TestExactWordAnalyzer(unittest.TestCase):
def setUp(self):
self.paste = mock.Mock()

def test_match(self):
analyzer = ExactWordAnalyzer(None, "Test")
self.paste.body = "This is a Test"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "There is a test for everything"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "There are tests for everything"
self.assertFalse(analyzer.match(self.paste))

self.paste.body = "This is a Test for a longer sentence"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "Completely unrelated"
self.assertFalse(analyzer.match(self.paste))

def test_blacklist(self):
blacklist = ["fake", "bad"]
analyzer = ExactWordAnalyzer(None, "Test", blacklist=blacklist)

self.paste.body = "This is a Test"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "This is a fake Test"
self.assertFalse(analyzer.match(self.paste))

analyzer = ExactWordAnalyzer(None, "Test", blacklist=blacklist, case_sensitive=True)

self.paste.body = "This is a Test"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "This is a Fake Test"
self.assertTrue(analyzer.match(self.paste))

def test_multiple_words(self):
analyzer = ExactWordAnalyzer(None, None)
self.assertEqual(analyzer.words, [])

analyzer = ExactWordAnalyzer(None, ["My", "first", "Test"])
self.paste.body = "This is a little test for something"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "You are my best friend so far!"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "This is the first time I try this"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "This time we try to match multiple words/tests for the first time."
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "None of the words are contained!"
self.assertFalse(analyzer.match(self.paste))

# Check for case sensitivity for multiple words
analyzer2 = ExactWordAnalyzer(None, ["My", "first", "Test"], case_sensitive=True)

self.paste.body = "That's not my issue!"
self.assertFalse(analyzer2.match(self.paste))

self.paste.body = "That's not My issue!"
self.assertTrue(analyzer2.match(self.paste))

def test_add_word(self):
analyzer = ExactWordAnalyzer(None, "Test")
self.assertEqual(len(analyzer.words), 1)
self.assertEqual(analyzer.words, ["Test"])

analyzer.add_word("second")
self.assertEqual(len(analyzer.words), 2)
self.assertEqual(analyzer.words, ["Test", "second"])

def test_case_sensitive(self):
analyzer = ExactWordAnalyzer(None, "Test", case_sensitive=True)
self.paste.body = "This is a Test for case sensitivity"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "This is a test for case sensitivity"
self.assertFalse(analyzer.match(self.paste))

self.paste.body = "This is a tESt for case sensitivity"
self.assertFalse(analyzer.match(self.paste))

analyzer2 = ExactWordAnalyzer(None, "Te1st")
self.paste.body = "This is a te1st for case sensitivity"
self.assertTrue(analyzer2.match(self.paste))

analyzer2 = ExactWordAnalyzer(None, "Te1st")
self.paste.body = "This is a tE1st for case sensitivity"
self.assertTrue(analyzer2.match(self.paste))

def test_multiple_case_sensitive(self):
"""Test if it's possible to match any of multiple words in a wordanalyzer when case sensitivty is activated"""
analyzer = ExactWordAnalyzer(None, ["My", "first", "Test"], case_sensitive=True)
self.paste.body = "This is a little test for something"
self.assertFalse(analyzer.match(self.paste))

self.paste.body = "You are my best friend so far!"
self.assertFalse(analyzer.match(self.paste))

self.paste.body = "Myself and I"
self.assertFalse(analyzer.match(self.paste))

self.paste.body = "This is a Test for case sensitivity"
match = analyzer.match(self.paste)
self.assertTrue(match)
self.assertEqual(1, len(match))
self.assertEqual("Test", match[0])

self.paste.body = "This is a test for case sensitivity. It's the first of its kind."
match = analyzer.match(self.paste)
self.assertTrue(match)
self.assertEqual(1, len(match))
self.assertEqual("first", match[0])

def test_match_none(self):
analyzer = ExactWordAnalyzer(None, "Test")
self.paste.body = None
self.assertFalse(analyzer.match(self.paste))

self.paste = None
self.assertFalse(analyzer.match(self.paste))

def test_match_empty(self):
analyzer = ExactWordAnalyzer(None, "Test")
self.paste.body = ""
self.assertFalse(analyzer.match(self.paste))

def test_actions_present(self):
action = mock.MagicMock(spec=BasicAction)
analyzer = ExactWordAnalyzer(action, "Test")
self.assertEqual([action], analyzer.actions)


if __name__ == '__main__':
unittest.main()

0 comments on commit 08ebdbc

Please sign in to comment.