Skip to content

Commit

Permalink
feat(analyzers): Implement base64asciianalyzer (#172)
Browse files Browse the repository at this point in the history
Thank you for your contribution(s) - especially for the tests, since, from my experience, these take most of the time to set up.

All tests are green, the analyzer matches only what it should, codacy is happy with the code quality, so I am happy to merge this PR.

fixes #166 
___
* feat(analyzers): implement base64asciianalyzer

* test(analyzers) add test to base64analyzers

* fix: use proper name __init__

* feat: add option to return decoded b64 from analyzer

The b64asciianalyzer can now return either the original text or the decoded result

* fix: add check for valid ascii characters

by using the decode method on the byte string and catching exceptions, we can figure out if the result is actual ascii or other data.

* update base64asciianalyzer tests for decode flag functionality

* fix(test): correct order of equality comparisons

The first parameter is always the expected result. The second one is the sample one.

* fix(test): check for newline character

The original test did not actually check for newline/linefeed character but for the literal string '\n'.

Co-authored-by: Rico <d-Rickyy-b@users.noreply.github.com>
  • Loading branch information
jonahrosenblum and d-Rickyy-b committed Feb 12, 2020
1 parent 6345639 commit b535781
Show file tree
Hide file tree
Showing 5 changed files with 216 additions and 1 deletion.
2 changes: 2 additions & 0 deletions pastepwn/analyzers/__init__.py
Expand Up @@ -6,6 +6,7 @@
from .awssessiontokenanalyzer import AWSSessionTokenAnalyzer
from .azuresubscriptionkeyanalyzer import AzureSubscriptionKeyAnalyzer
from .base64analyzer import Base64Analyzer
from .base64asciianalyzer import Base64AsciiAnalyzer
from .basicanalyzer import BasicAnalyzer
from .battlenetkeyanalyzer import BattleNetKeyAnalyzer
from .bcrypthashanalyzer import BcryptHashAnalyzer
Expand Down Expand Up @@ -50,6 +51,7 @@
'AWSSessionTokenAnalyzer',
'AzureSubscriptionKeyAnalyzer',
'Base64Analyzer',
'Base64AsciiAnalyzer',
'BasicAnalyzer',
'BattleNetKeyAnalyzer',
'BcryptHashAnalyzer',
Expand Down
2 changes: 1 addition & 1 deletion pastepwn/analyzers/base64analyzer.py
Expand Up @@ -12,7 +12,7 @@ def __init__(self, actions, min_len=1):
super().__init__(actions, regex)

def verify(self, results):
"""Method to perform additional checks to test if the found strings are of sufficient lenght"""
"""Method to perform additional checks to test if the found strings are of sufficient length"""
validated_strings = []

for result in results:
Expand Down
42 changes: 42 additions & 0 deletions pastepwn/analyzers/base64asciianalyzer.py
@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
from .base64analyzer import Base64Analyzer
from base64 import b64decode
import binascii


class Base64AsciiAnalyzer(Base64Analyzer):
"""Analyzer to match base64 strings which decode to valid ASCII"""
name = 'Base64AsciiAnalyzer'

def __init__(self, actions, min_len=1, decode=False):
super().__init__(actions, min_len)
self.decode = decode

def verify(self, results):
"""Method to determine if found base64 decodes to valid ASCII"""
# find valid base64 strings with the parent class
validated_strings = super().verify(results)

# go through each base64 string and attempt to decode
base64_ascii_strings = []

for validated_string in validated_strings:
# Check if the string is valid base64
try:
decoded_string = b64decode(validated_string)
except binascii.Error:
# The string is no valid base64
continue

# Check if the valid base64 decodes to plain ascii
try:
b64_ascii_string = decoded_string.decode('ascii')
except UnicodeDecodeError:
continue

if self.decode:
base64_ascii_strings.append(b64_ascii_string)
else:
base64_ascii_strings.append(validated_string)

return base64_ascii_strings
10 changes: 10 additions & 0 deletions pastepwn/analyzers/tests/base64analyzer_test.py
Expand Up @@ -109,6 +109,16 @@ def test_match_negative(self):
"Dj448rhbNTJrKhRn7TPkYRubZLhmbCrg6bavDa9a"
self.assertFalse(self.analyzer.match(self.paste))

def test_invalid_decodes(self):
"""Test to make sure we match all base64 strings even ones that don't decode to ASCII."""
# base64 encoded string containing one non-ascii character: "This string contains a non-ascii character: ¤" (UTF-8)
self.paste.body = "VGhpcyBzdHJpbmcgY29udGFpbnMgYSBub24tYXNjaWkgY2hhcmFjdGVyOiDCpA=="
self.assertTrue(self.analyzer.match(self.paste))

# base64 encoded string containing only non-ascii characters: "ΗÈλλθ ωÖΓλÐ" (UTF-8)
self.paste.body = "zpfDiM67zrvOuCDPicOWzpPOu8OQ"
self.assertTrue(self.analyzer.match(self.paste))


if __name__ == '__main__':
unittest.main()
161 changes: 161 additions & 0 deletions pastepwn/analyzers/tests/base64asciianalyzer_test.py
@@ -0,0 +1,161 @@
# -*- coding: utf-8 -*-
import unittest
from unittest import mock

from pastepwn.analyzers.base64asciianalyzer import Base64AsciiAnalyzer


class TestBase64AsciiAnalyzer(unittest.TestCase):
def setUp(self):
self.analyzer = Base64AsciiAnalyzer(None)
self.paste = mock.Mock()

def test_match_positive(self):
"""Test if positives are recognized"""
# base64 encoded string: "Hello World" (UTF-8, LF)
self.paste.body = "SGVsbG8gV29ybGQ="
self.assertTrue(self.analyzer.match(self.paste))

# base64 encoded string: "Hello\nWorld" (UTF-8, LF)
self.paste.body = "SGVsbG9cbldvcmxk"
self.assertTrue(self.analyzer.match(self.paste))

# base64 encoded string (32 chars): "2fwZ_CTjDKxu48FLCLZcGdB!sEj5XRQh" (UTF-8, LF)
self.paste.body = "MmZ3Wl9DVGpES3h1NDhGTENMWmNHZEIhc0VqNVhSUWg="
self.assertTrue(self.analyzer.match(self.paste))

# base64 encoded string (64 chars): "Mv=ZH?NJrrBSdhus*KVg%4dG6*C&ub?sSeq!VrzCb_-QcY^KWfxKy8AJ3=^5?b6N"
# (UTF-8, LF)
self.paste.body = "TXY9Wkg/TkpyckJTZGh1cypLVmclNGRHNipDJnViP3NTZXEhVnJ6Q2JfLVFjWV5LV2Z4S3k4QUozPV41P2I2Tg=="
self.assertTrue(self.analyzer.match(self.paste))

# base64 encoded string (256 chars): "etFk!?m@A_vvdMT39Mgcynx_AFz6HY!4R8U3n_7JA?-rF=F3ehWat%4rKfhsuCc98G
# =t8jMY7hgJDZ2c!y!$!XQATbk6fQD2pa+EdQ_rfP^&_DKJ34dFPcuGjDBTqdxZ&=3U%@dm&?JW#+k@mB%a3TFn%GAzukL+-%TUTq?fAbAKr
# @y%LPK+KEmxeh+rg7?s3aR2v5A%tbn&_7zNMckCPRd&s8$wW5Bec@aRMCs@4rn?cRx?a&y-Z%kn&h8aLu*R" (UTF-8, LF)
self.paste.body = "ZXRGayE/bUBBX3Z2ZE1UMzlNZ2N5bnhfQUZ6NkhZITRSOFUzbl83SkE/LXJGPUYzZWhXYXQlNHJLZmhzdUNjO" \
"ThHPXQ4ak1ZN2hnSkRaMmMheSEkIVhRQVRiazZmUUQycGErRWRRX3JmUF4mX0RLSjM0ZEZQY3VHakRCVHFkeF" \
"omPTNVJUBkbSY/SlcjK2tAbUIlYTNURm4lR0F6dWtMKy0lVFVUcT9mQWJBS3JAeSVMUEsrS0VteGVoK3JnNz9" \
"zM2FSMnY1QSV0Ym4mXzd6Tk1ja0NQUmQmczgkd1c1QmVjQGFSTUNzQDRybj9jUng/YSZ5LVola24maDhhTHUqUg=="
self.assertTrue(self.analyzer.match(self.paste))

def test_intext(self):
"""Test if matches inside text are recognized"""
self.paste.body = "I wan to tel you tha TXY9Wkg/TkpyckJTZGh1cypLVmclNGRHNipDJnViP3NTZXEhVnJ6Q2JfLVFjWV5LV2Z4S3k4QUozPV41P2I2Tg== is " \
"very important"
match = self.analyzer.match(self.paste)
self.assertTrue(match)
self.assertEqual("TXY9Wkg/TkpyckJTZGh1cypLVmclNGRHNipDJnViP3NTZXEhVnJ6Q2JfLVFjWV5LV2Z4S3k4QUozPV41P2I2Tg==", match[0])

def test_multiple(self):
"""Test if multiple matches are recognized"""
# Needed to keep the words below 3 chars each. Otherwise they would match as well
self.paste.body = "I wan to tel you tha TXY9Wkg/TkpyckJTZGh1cypLVmclNGRHNipDJnViP3NTZXEhVnJ6Q2JfLVFjWV5LV2Z4S3k4QUozPV41P2I2Tg== is " \
"ver imp.\nBut not onl tha, it's als MmZ3Wl9DVGpES3h1NDhGTENMWmNHZEIhc0VqNVhSUWg= and muc mor!"
match = self.analyzer.match(self.paste)
self.assertTrue(match)
self.assertEqual("TXY9Wkg/TkpyckJTZGh1cypLVmclNGRHNipDJnViP3NTZXEhVnJ6Q2JfLVFjWV5LV2Z4S3k4QUozPV41P2I2Tg==", match[0])
self.assertEqual("MmZ3Wl9DVGpES3h1NDhGTENMWmNHZEIhc0VqNVhSUWg=", match[1])

def test_multiple_min_len(self):
"""Test if we can match multiple base64 strings in a longer text with min_len"""
analyzer = Base64AsciiAnalyzer(None, min_len=8)
self.paste.body = "I wanted to tell you that TXY9Wkg/TkpyckJTZGh1cypLVmclNGRHNipDJnViP3NTZXEhVnJ6Q2JfLVFjWV5LV2Z4S3k4QUozPV41P2I2Tg== is " \
"very important.\nBut not only that, it's also MmZ3Wl9DVGpES3h1NDhGTENMWmNHZEIhc0VqNVhSUWg= and much more!"
match = analyzer.match(self.paste)
self.assertTrue(match)
self.assertEqual("TXY9Wkg/TkpyckJTZGh1cypLVmclNGRHNipDJnViP3NTZXEhVnJ6Q2JfLVFjWV5LV2Z4S3k4QUozPV41P2I2Tg==", match[0])
self.assertEqual("MmZ3Wl9DVGpES3h1NDhGTENMWmNHZEIhc0VqNVhSUWg=", match[1])

def test_min_len(self):
"""Test if the min_len parameter works as expected"""
self.paste.body = "dGVz"
analyzer = Base64AsciiAnalyzer(None, min_len=4)
match = analyzer.match(self.paste)
self.assertTrue(match)

self.paste.body = "dGVz"
analyzer = Base64AsciiAnalyzer(None, min_len=5)
match = analyzer.match(self.paste)
self.assertFalse(match)

self.paste.body = "dGVzdFRoaXNTdHJpbmc="
match = analyzer.match(self.paste)
self.assertTrue(match)

def test_match_negative(self):
"""Test if negatives are not recognized"""
# test that when nothing, is provided nothing matches
self.paste.body = ""
self.assertFalse(self.analyzer.match(self.paste))

# test that when nothing, is provided nothing matches
self.paste.body = None
self.assertFalse(self.analyzer.match(self.paste))

# invalid base64 string (% symbol inserted which is not valid base64)
self.paste.body = "SGVsbG8gV%29ybGQ="
self.assertFalse(self.analyzer.match(self.paste))

# not a base64 string
self.paste.body = "====="
self.assertFalse(self.analyzer.match(self.paste))

# base32 encoded string
self.paste.body = "JBSWY3DPEBLW64TMMQ======"
self.assertFalse(self.analyzer.match(self.paste))

# long string (129) not base64
self.paste.body = "sFm2XgxTt6fuErnWw9JZkae76sL7XDqyNvf2Wkatt9gkzVDxXTf6dCr3Yh6fT82fFzvNWG49P3KSR7XXngHJ5D9ba" \
"Dj448rhbNTJrKhRn7TPkYRubZLhmbCrg6bavDa9a"
self.assertFalse(self.analyzer.match(self.paste))

def test_invalid_decodes(self):
"""Test to make sure we don't match base64 strings which don't decode to ASCII"""
# base64 encoded string containing one non-ascii character: "This string contains a non-ascii character: ¤" (UTF-8)
self.paste.body = "VGhpcyBzdHJpbmcgY29udGFpbnMgYSBub24tYXNjaWkgY2hhcmFjdGVyOiDCpA=="
self.assertFalse(self.analyzer.match(self.paste))

# base64 encoded string containing only non-ascii characters: "ΗÈλλθ ωÖΓλÐ" (UTF-8)
self.paste.body = "zpfDiM67zrvOuCDPicOWzpPOu8OQ"
self.assertFalse(self.analyzer.match(self.paste))

# base64 encoded string containing one non-ascii character: "º" (UTF-8)
self.paste.body = "wro="
self.assertFalse(self.analyzer.match(self.paste))

def test_ascii_decode(self):
"""Test if ascii decode flag works"""
analyzer = Base64AsciiAnalyzer(None, decode=True)

# base64 encoded string: "Hello World" (UTF-8, LF)
self.paste.body = "SGVsbG8gV29ybGQ="
self.assertEqual("Hello World", analyzer.match(self.paste)[0])

# base64 encoded string: "Hello\nWorld" (UTF-8, LF)
self.paste.body = "SGVsbG8KV29ybGQ="
self.assertEqual("Hello\nWorld", analyzer.match(self.paste)[0])

# base64 encoded string (32 chars): "2fwZ_CTjDKxu48FLCLZcGdB!sEj5XRQh" (UTF-8, LF)
self.paste.body = "MmZ3Wl9DVGpES3h1NDhGTENMWmNHZEIhc0VqNVhSUWg="
self.assertEqual("2fwZ_CTjDKxu48FLCLZcGdB!sEj5XRQh", analyzer.match(self.paste)[0])

# base64 encoded string (64 chars): "Mv=ZH?NJrrBSdhus*KVg%4dG6*C&ub?sSeq!VrzCb_-QcY^KWfxKy8AJ3=^5?b6N"
# (UTF-8, LF)
self.paste.body = "TXY9Wkg/TkpyckJTZGh1cypLVmclNGRHNipDJnViP3NTZXEhVnJ6Q2JfLVFjWV5LV2Z4S3k4QUozPV41P2I2Tg=="
self.assertEqual("Mv=ZH?NJrrBSdhus*KVg%4dG6*C&ub?sSeq!VrzCb_-QcY^KWfxKy8AJ3=^5?b6N", analyzer.match(self.paste)[0])

# base64 encoded string (256 chars): "etFk!?m@A_vvdMT39Mgcynx_AFz6HY!4R8U3n_7JA?-rF=F3ehWat%4rKfhsuCc98G
# =t8jMY7hgJDZ2c!y!$!XQATbk6fQD2pa+EdQ_rfP^&_DKJ34dFPcuGjDBTqdxZ&=3U%@dm&?JW#+k@mB%a3TFn%GAzukL+-%TUTq?fAbAKr
# @y%LPK+KEmxeh+rg7?s3aR2v5A%tbn&_7zNMckCPRd&s8$wW5Bec@aRMCs@4rn?cRx?a&y-Z%kn&h8aLu*R" (UTF-8, LF)
self.paste.body = "ZXRGayE/bUBBX3Z2ZE1UMzlNZ2N5bnhfQUZ6NkhZITRSOFUzbl83SkE/LXJGPUYzZWhXYXQlNHJLZmhzdUNjO" \
"ThHPXQ4ak1ZN2hnSkRaMmMheSEkIVhRQVRiazZmUUQycGErRWRRX3JmUF4mX0RLSjM0ZEZQY3VHakRCVHFkeF" \
"omPTNVJUBkbSY/SlcjK2tAbUIlYTNURm4lR0F6dWtMKy0lVFVUcT9mQWJBS3JAeSVMUEsrS0VteGVoK3JnNz9" \
"zM2FSMnY1QSV0Ym4mXzd6Tk1ja0NQUmQmczgkd1c1QmVjQGFSTUNzQDRybj9jUng/YSZ5LVola24maDhhTHUqUg=="
self.assertEqual("etFk!?m@A_vvdMT39Mgcynx_AFz6HY!4R8U3n_7JA?-rF=F3ehWat%4rKfhsuCc98G" \
"=t8jMY7hgJDZ2c!y!$!XQATbk6fQD2pa+EdQ_rfP^&_DKJ34dFPcuGjDBTqdxZ&=3U%" \
"@dm&?JW#+k@mB%a3TFn%GAzukL+-%TUTq?fAbAKr@y%LPK+KEmxeh+rg7?s3aR2v5A%tbn&" \
"_7zNMckCPRd&s8$wW5Bec@aRMCs@4rn?cRx?a&y-Z%kn&h8aLu*R", analyzer.match(self.paste)[0])


if __name__ == '__main__':
unittest.main()

0 comments on commit b535781

Please sign in to comment.