Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add exact word analyzer and tests #194

Merged
merged 4 commits into from
Oct 1, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions pastepwn/analyzers/exactwordanalyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
import re
from .basicanalyzer import BasicAnalyzer
from pastepwn.util import listify


class ExactWordAnalyzer(BasicAnalyzer):
"""Analyzer to match the content of a paste by words"""
name = "ExactWordAnalyzer"

def __init__(self, actions, words, blacklist=None, case_sensitive=False):
super().__init__(actions, "{0} ({1})".format(self.name, words))

self.words = listify(words)
self.blacklist = blacklist or []
self.case_sensitive = case_sensitive

def _blacklist_word_found(self, text):
blacklist = self.blacklist

if not len(blacklist):
return False

if not self.case_sensitive:
text = text.lower()
blacklist = [word.lower() for word in blacklist]

return any((self._word_in_text(word, text) for word in blacklist))

def add_word(self, word):
"""
Add a word to the analyzer
:param word: Word to be added
:return:
"""
self.words.append(word)

def match(self, paste):
"""Check if the specified words are part of the paste text"""
if paste is None:
return False

paste_content = paste.body or ""

if self._blacklist_word_found(paste_content):
return False

words = self.words

if not self.case_sensitive:
paste_content = paste_content.lower()
words = [word.lower() for word in words]

return [
word for word in words
if self._word_in_text(word, paste_content)
]

def _word_in_text(self, word, text):
pattern = r'\b' + word + r'\b'
return re.search(pattern, text) is not None
149 changes: 149 additions & 0 deletions pastepwn/analyzers/tests/exactwordanalyzer_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# -*- coding: utf-8 -*-
import unittest
from unittest import mock

from pastepwn.actions.basicaction import BasicAction
from pastepwn.analyzers.exactwordanalyzer import ExactWordAnalyzer


class TestExactWordAnalyzer(unittest.TestCase):
def setUp(self):
self.paste = mock.Mock()

def test_match(self):
analyzer = ExactWordAnalyzer(None, "Test")
self.paste.body = "This is a Test"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "There is a test for everything"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "There are tests for everything"
self.assertFalse(analyzer.match(self.paste))

self.paste.body = "This is a Test for a longer sentence"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "Completely unrelated"
self.assertFalse(analyzer.match(self.paste))

def test_blacklist(self):
blacklist = ["fake", "bad"]
analyzer = ExactWordAnalyzer(None, "Test", blacklist=blacklist)

self.paste.body = "This is a Test"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "This is a fake Test"
self.assertFalse(analyzer.match(self.paste))

analyzer = ExactWordAnalyzer(None, "Test", blacklist=blacklist, case_sensitive=True)

self.paste.body = "This is a Test"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "This is a Fake Test"
self.assertTrue(analyzer.match(self.paste))

def test_multiple_words(self):
analyzer = ExactWordAnalyzer(None, None)
self.assertEqual(analyzer.words, [])

analyzer = ExactWordAnalyzer(None, ["My", "first", "Test"])
self.paste.body = "This is a little test for something"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "You are my best friend so far!"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "This is the first time I try this"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "This time we try to match multiple words/tests for the first time."
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "None of the words are contained!"
self.assertFalse(analyzer.match(self.paste))

# Check for case sensitivity for multiple words
analyzer2 = ExactWordAnalyzer(None, ["My", "first", "Test"], case_sensitive=True)

self.paste.body = "That's not my issue!"
self.assertFalse(analyzer2.match(self.paste))

self.paste.body = "That's not My issue!"
self.assertTrue(analyzer2.match(self.paste))

def test_add_word(self):
analyzer = ExactWordAnalyzer(None, "Test")
self.assertEqual(len(analyzer.words), 1)
self.assertEqual(analyzer.words, ["Test"])

analyzer.add_word("second")
self.assertEqual(len(analyzer.words), 2)
self.assertEqual(analyzer.words, ["Test", "second"])

def test_case_sensitive(self):
analyzer = ExactWordAnalyzer(None, "Test", case_sensitive=True)
self.paste.body = "This is a Test for case sensitivity"
self.assertTrue(analyzer.match(self.paste))

self.paste.body = "This is a test for case sensitivity"
self.assertFalse(analyzer.match(self.paste))

self.paste.body = "This is a tESt for case sensitivity"
self.assertFalse(analyzer.match(self.paste))

analyzer2 = ExactWordAnalyzer(None, "Te1st")
self.paste.body = "This is a te1st for case sensitivity"
self.assertTrue(analyzer2.match(self.paste))

analyzer2 = ExactWordAnalyzer(None, "Te1st")
self.paste.body = "This is a tE1st for case sensitivity"
self.assertTrue(analyzer2.match(self.paste))

def test_multiple_case_sensitive(self):
"""Test if it's possible to match any of multiple words in a wordanalyzer when case sensitivty is activated"""
analyzer = ExactWordAnalyzer(None, ["My", "first", "Test"], case_sensitive=True)
self.paste.body = "This is a little test for something"
self.assertFalse(analyzer.match(self.paste))

self.paste.body = "You are my best friend so far!"
self.assertFalse(analyzer.match(self.paste))

self.paste.body = "Myself and I"
self.assertFalse(analyzer.match(self.paste))

self.paste.body = "This is a Test for case sensitivity"
match = analyzer.match(self.paste)
self.assertTrue(match)
self.assertEqual(1, len(match))
self.assertEqual("Test", match[0])

self.paste.body = "This is a test for case sensitivity. It's the first of its kind."
match = analyzer.match(self.paste)
self.assertTrue(match)
self.assertEqual(1, len(match))
self.assertEqual("first", match[0])

def test_match_none(self):
analyzer = ExactWordAnalyzer(None, "Test")
self.paste.body = None
self.assertFalse(analyzer.match(self.paste))

self.paste = None
self.assertFalse(analyzer.match(self.paste))

def test_match_empty(self):
analyzer = ExactWordAnalyzer(None, "Test")
self.paste.body = ""
self.assertFalse(analyzer.match(self.paste))

def test_actions_present(self):
action = mock.MagicMock(spec=BasicAction)
analyzer = ExactWordAnalyzer(action, "Test")
self.assertEqual([action], analyzer.actions)


if __name__ == '__main__':
unittest.main()