Skip to content

Commit

Permalink
Merge 91915b2 into f91a0ff
Browse files Browse the repository at this point in the history
  • Loading branch information
d-Rickyy-b committed Jun 19, 2020
2 parents f91a0ff + 91915b2 commit 9593acb
Show file tree
Hide file tree
Showing 9 changed files with 125 additions and 6 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Fixed
- The PastebinScraper could not recognize error messages with IPv6 addresses.

## [1.3.0] - 2020-03-03
### Added
Expand Down
2 changes: 1 addition & 1 deletion pastepwn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@
from .core.scrapinghandler import ScrapingHandler
from .core.actionhandler import ActionHandler

__author__ = "d-Rickyy-b (pastepwn@rickyy.de)"
__author__ = "d-Rickyy-b (pastepwn@rico-j.de)"

__all__ = ['PastePwn', 'Paste', 'PasteDispatcher', 'ScrapingHandler', 'ActionHandler']
64 changes: 64 additions & 0 deletions pastepwn/analyzers/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Analyzers
This directory contains all the analyzers for pastepwn. An analyzer is a class that extends the [BasicAnalyzer]() class (or any other subclass of that) and overrides at least the `match` function.

The match function gets passed a paste object.
The job of an analyzer is to check if a paste matches certain criteria. If it matches, pastepwn will execute the action(s), that is stored in the analyzer.


## Example analyzers
Analyzers can check for multiple things.
A paste contains a certain, previously defined string.
Or the paste matches a certain regex.
Or the paste's syntax is set to `java`.
And many other things.

Check a few selected examples to get an idea:
- [RegexAnalyzer](https://github.com/d-Rickyy-b/pastepwn/blob/master/pastepwn/analyzers/regexanalyzer.py) - Foundation for most other analyzers. Checks a
paste against a regex
- [SteamKeyAnalyzer](https://github.com/d-Rickyy-b/pastepwn/blob/master/pastepwn/analyzers/steamkeyanalyzer.py) - Checks if a paste contains a Steam Key
- [IBANAnalyzer](https://github.com/d-Rickyy-b/pastepwn/blob/master/pastepwn/analyzers/ibananalyzer.py) - Checks if a paste contains an IBAN


## Create own analyzer
Check out the implementations of a few analyzers and you'll get an idea on how to get started.

```python
# -*- coding: utf-8 -*-
from .basicanalyzer import BasicAnalyzer


class MyAnalyzer(BasicAnalyzer):
name = "MyAnalyzer"

def __init__(self, actions, regex, flags=0, blacklist=None):
# We need co call the init of super, to initialize some settings in the basicanalyzer
super().__init__(actions, self.name)

# We can do some custom setup stuff to initialize e.g. blacklists
self.blacklist = blacklist or []

def match(self, paste):
# Here our pastes get matched. We can access all fields of the paste object
paste_title = paste.title or ""

# We can for example check if the paste title contains a certain string
return "Secret" in paste_title
```

## Combining analyzers
To combine multiple analyzers and hence multiple conditions, you can use bitwise operators.
Those bitwise operators act as a logical operators by creating a new `MergedAnalyzer` class that handles the individual analyzers internally.

`&` - bitwise AND for combining analyzers with a logical AND
`|` - bitwise OR for combining analyzers with a logical OR

```python
analyzer1 = SomeAnalyzer(...)
analyzer2 = SomeOtherAnalyzer(...)
analyzer3 = ThirdAnalyzer(...)

realAnalyzer = (analyzer1 & analyzer2) | analyzer3
```

The `realAnalyzer` only matches if either `analyzer1` and `analyzer2` both match, or if `analyzer3` matches.

5 changes: 4 additions & 1 deletion pastepwn/analyzers/basicanalyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,10 @@ def __repr__(self):


class MergedAnalyzer(BasicAnalyzer):
"""Basic analyzer class"""
"""
Combination class to combine multiple analyzers into a single one
Doesn't need to be created manually - use the binary operators (& and |) to combine multiple analyzers.
"""
name = "MergedAnalyzer"

def __init__(self, base_analyzer, and_analyzer=None, or_analyzer=None):
Expand Down
2 changes: 1 addition & 1 deletion pastepwn/core/pastepwn.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def add_scraper(self, scraper, restart_scraping=False):
def add_analyzer(self, analyzer):
"""
Adds a new analyzer to the list of analyzers
:param analyzer: Instance of an BasicAnalyzer
:param analyzer: Instance of a BasicAnalyzer
:return: None
"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@


class IPNotRegisteredError(Exception):
"""Exception class indicating that your IP is not witelisted on pastebin"""

def __init__(self, body):
ip = re.search("YOUR IP: (.*?) DOES NOT HAVE ACCESS", body).group(1)
Expand Down
4 changes: 2 additions & 2 deletions pastepwn/scraping/pastebin/pastebinscraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ def __init__(self, paste_queue=None, exception_event=None, api_hit_rate=None):

def _check_error(self, body, key=None):
"""Checks if an error occurred and raises an exception if it did"""
pattern = r"YOUR IP: \d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} DOES NOT HAVE ACCESS\.\s+VISIT: https:\/\/pastebin\.com\/doc_scraping_api TO GET ACCESS!"
pattern = r"^YOUR IP: ((\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})|((([0-9A-Fa-f]{1,4}:){7})([0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,6}:)(([0-9A-Fa-f]{1,4}:){0,4})([0-9A-Fa-f]{1,4}))) DOES NOT HAVE ACCESS\.\s+VISIT: https:\/\/pastebin\.com\/doc_scraping_api TO GET ACCESS!"

if 107 >= len(body) >= 99 and re.match(pattern, body):
if 131 >= len(body) and re.match(pattern, body):
self._exception_event.set()
raise IPNotRegisteredError(body)

Expand Down
49 changes: 49 additions & 0 deletions pastepwn/scraping/pastebin/tests/pastebinscraper_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
import unittest
from pastepwn.scraping.pastebin import PastebinScraper
from pastepwn.scraping.pastebin.exceptions import IPNotRegisteredError, PasteDeletedException, PasteNotReadyException, PasteEmptyException


class TestPastebinscraper(unittest.TestCase):

def setUp(self) -> None:
self.pastebinscraper = PastebinScraper()

def test_empty(self):
with self.assertRaises(PasteEmptyException):
self.pastebinscraper._check_error("")

def test_not_ready(self):
with self.assertRaises(PasteNotReadyException):
self.pastebinscraper._check_error("File is not ready for scraping yet. Try again in 1 minute.")

def test_deleted(self):
with self.assertRaises(PasteDeletedException):
self.pastebinscraper._check_error("Error, we cannot find this paste.")

def _check_ip_not_registered(self, ip_list):
shell = "YOUR IP: {} DOES NOT HAVE ACCESS. VISIT: https://pastebin.com/doc_scraping_api TO GET ACCESS!"
for ip in ip_list:
with self.assertRaises(IPNotRegisteredError):
self.pastebinscraper._check_error(shell.format(ip))
print("The following IP was not detected: {}".format(ip))

def test_ipv4_not_registered(self):
"""Test if the _check_error method detects different IPv4 addresses. It's okay to also detect invalid addresses where an octed is > 255)"""
ipv4_test = ["1.1.1.1", "10.1.5.6", "1.10.5.6", "1.1.50.6", "1.1.5.60", "1.1.50.60", "1.10.50.60", "10.10.50.60", "10.10.50.255", "10.10.255.255",
"10.255.255.255", "255.255.255.255", "333.333.333.333"]

self._check_ip_not_registered(ipv4_test)

def test_ipv6_not_registered(self):
ipv6_test = ["fe80::21d8:f50:c295:c4be", "2001:cdba:0000:0000:0000:0000:3257:9652", "2001:cdba:0:0:0:0:3257:9652", "2001:cdba::3257:9652",
"2001:cdba::1222", "21DA:D3:0:2F3B:2AA:FF:FE28:9C5A", "2001:cdba::1:2:3:3257:9652", "FE80::8329", "FE80::FFFF:8329",
"FE80::B3FF:FFFF:8329", "FE80::0202:B3FF:FFFF:8329", "FE80:0000:0000:0000:0202:B3FF:FFFF:8329"]
# TODO: IPv6 addresses with double colon AND full zero groups (of 16 bits) are currently not recognized by the used regex. An example address would
# be: `FE80::0000:0000:0202:B3FF:FFFF:8329`

self._check_ip_not_registered(ipv6_test)


if __name__ == '__main__':
unittest.main()
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def requirements():
long_description_content_type='text/markdown',
url='https://github.com/d-Rickyy-b/pastepwn',
author='d-Rickyy-b',
author_email='pastepwn@rickyy.de',
author_email='pastepwn@rico-j.de',
license='MIT',
packages=packages,
include_package_data=True,
Expand Down

0 comments on commit 9593acb

Please sign in to comment.