From 7f5f6d8a14660f22256370fa734ff13356c22113 Mon Sep 17 00:00:00 2001 From: Spencer Fraint Date: Wed, 4 Apr 2018 19:20:24 -0700 Subject: [PATCH] Python2 support (#58) * identify AS numbers that are not a subset of larger numbers and replace AS numbers all at once * renamed test function * more test cases * using lookahead and lookbehind for consistency with other regex sub * created AS number anonymizer * remove unused arg * reorder class functions * class case consistency * class case consistency * added unicode check in ip anonymization, added e2e test * flake and path fix * Merge branch 'master' into python2-support * added ensure unicode function, udpated testpaths * cleaned up e2e path/string code a little, moved ensure_unicode function * flake fixes --- .travis/build.sh | 2 +- netconan/ip_anonymization.py | 13 +++- netconan/sensitive_item_removal.py | 4 +- setup.cfg | 2 +- tests/end_to_end/test_end_to_end.py | 60 +++++++++++++++++++ tests/{ => unit}/test_ip_anonymization.py | 5 +- tests/{ => unit}/test_parse_args.py | 0 .../{ => unit}/test_sensitive_item_removal.py | 4 ++ 8 files changed, 80 insertions(+), 10 deletions(-) create mode 100644 tests/end_to_end/test_end_to_end.py rename tests/{ => unit}/test_ip_anonymization.py (99%) rename tests/{ => unit}/test_parse_args.py (100%) rename tests/{ => unit}/test_sensitive_item_removal.py (99%) diff --git a/.travis/build.sh b/.travis/build.sh index 1afceab..e1f286e 100755 --- a/.travis/build.sh +++ b/.travis/build.sh @@ -10,7 +10,7 @@ echo -e "\n ..... Running flake8 on netconan to check style and docstrings" # Configuration for flake8 is taken from setup.cfg flake8 -echo -e "\n ..... Running unit tests with pytest" +echo -e "\n ..... Running tests with pytest" python setup.py test set +x diff --git a/netconan/ip_anonymization.py b/netconan/ip_anonymization.py index 4300fb8..1a2a523 100644 --- a/netconan/ip_anonymization.py +++ b/netconan/ip_anonymization.py @@ -14,6 +14,7 @@ # limitations under the License. from abc import ABCMeta, abstractmethod + from bidict import bidict import ipaddress import logging @@ -21,7 +22,7 @@ import regex from hashlib import md5 -from six import add_metaclass, iteritems +from six import add_metaclass, iteritems, text_type, u # Deliberately catching more than valid IPs so we can remove 0s later. @@ -52,6 +53,12 @@ def _generate_bit_from_hash(salt, string): return int(last_hash_digit, 16) & 1 +def _ensure_unicode(str): + if not isinstance(str, text_type): + str = u(str) + return str + + @add_metaclass(ABCMeta) class _BaseIpAnonymizer: def __init__(self, salt, length, salter=_generate_bit_from_hash): @@ -174,7 +181,7 @@ def make_addr(cls, addr_str): as octal (1.2.3.32). """ addr_str = IpAnonymizer._DROP_ZEROS_PATTERN.sub(r'\1.\2.\3.\4', addr_str) - return ipaddress.IPv4Address(addr_str) + return ipaddress.IPv4Address(_ensure_unicode(addr_str)) @classmethod def make_addr_from_int(cls, ip_int): @@ -201,7 +208,7 @@ def get_addr_pattern(cls): @classmethod def make_addr(cls, addr_str): """Return an IPv6 address from the given string.""" - return ipaddress.IPv6Address(addr_str) + return ipaddress.IPv6Address(_ensure_unicode(addr_str)) @classmethod def make_addr_from_int(cls, ip_int): diff --git a/netconan/sensitive_item_removal.py b/netconan/sensitive_item_removal.py index bdcb93e..5a26c1e 100644 --- a/netconan/sensitive_item_removal.py +++ b/netconan/sensitive_item_removal.py @@ -29,8 +29,8 @@ # These are catch-all regexes to find lines that seem like they might contain # sensitive info default_catch_all_regexes = [ - [('(\S* )*"?\K(\$9\$[^ ;"]+)(?="? ?.*)', 2)], - [('(\S* )*"?\K(\$1\$[^ ;"]+)(?="? ?.*)', 2)], + [('(\S* )*"?\K(\$9\$[^\s;"]+)(?="? ?.*)', 2)], + [('(\S* )*"?\K(\$1\$[^\s;"]+)(?="? ?.*)', 2)], [('(\S* )*encrypted-password \K(\S+)(?= ?.*)', None)], [('(\S* ?)*key "\K([^"]+)(?=".*)', 2)] ] diff --git a/setup.cfg b/setup.cfg index 3d925ee..3680055 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,4 +24,4 @@ universal=1 [flake8] filename=*.py, ignore=E501,D401 -exclude=docs,__pychache__,.eggs,*.egg,build +exclude=docs,__pychache__,.eggs,*.egg,build,virtualEnv diff --git a/tests/end_to_end/test_end_to_end.py b/tests/end_to_end/test_end_to_end.py new file mode 100644 index 0000000..90b5fac --- /dev/null +++ b/tests/end_to_end/test_end_to_end.py @@ -0,0 +1,60 @@ +"""Test Netconan from end to end.""" +# Copyright 2018 Intentionet +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import filecmp + +from netconan.netconan import main + + +def test_end_to_end(tmpdir): + """Test Netconan main with simulated input file and commandline args.""" + input_contents = """ +# Intentionet's sensitive test file +ip address 192.168.2.1 255.255.255.255 +my password is $1$salt$ABCDEFGHIJKLMNOPQRS +AS num 12345 and 65432 should be changed + +""" + ref_contents = """ +# 1cbbc2's fd8607 test file +ip address 201.235.139.13 255.255.255.255 +my password is $1$0000$CxUUGIrqPb7GaB5midrQZ. +AS num 8625 and 64818 should be changed + +""" + + filename = "test.txt" + input_dir = tmpdir.mkdir("input") + input_dir.join(filename).write(input_contents) + + output_dir = tmpdir.mkdir("output") + output_file = output_dir.join(filename) + + ref_file = tmpdir.join(filename) + ref_file.write(ref_contents) + + args = [ + '-i', str(input_dir), + '-o', str(output_dir), + '-s', 'TESTSALT', + '-a', + '-p', + '-w', 'intentionet,sensitive', + '-n', '65432,12345' + ] + main(args) + + # Make sure output file matches the ref + assert(filecmp.cmp(str(ref_file), str(output_file))) diff --git a/tests/test_ip_anonymization.py b/tests/unit/test_ip_anonymization.py similarity index 99% rename from tests/test_ip_anonymization.py rename to tests/unit/test_ip_anonymization.py index d7f2782..92a6598 100644 --- a/tests/test_ip_anonymization.py +++ b/tests/unit/test_ip_anonymization.py @@ -13,14 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import unicode_literals - import ipaddress import pytest import regex from netconan.ip_anonymization import ( IpAnonymizer, IpV6Anonymizer, anonymize_ip_addr) +from six import u ip_v4_list = [ ('10.11.12.13'), @@ -329,7 +328,7 @@ def test_false_positives(anonymizer_v4, anonymizer_v6, line): ]) def test_v4_anonymizer_ignores_leading_zeros(anonymizer_v4, zeros, no_zeros): """Test that v4 IP address ignore leading zeros & don't interpret octal.""" - assert(ipaddress.IPv4Address(no_zeros) == anonymizer_v4.make_addr(zeros)) + assert(ipaddress.IPv4Address(u(no_zeros)) == anonymizer_v4.make_addr(zeros)) @pytest.mark.parametrize('ip_int, expected', [ diff --git a/tests/test_parse_args.py b/tests/unit/test_parse_args.py similarity index 100% rename from tests/test_parse_args.py rename to tests/unit/test_parse_args.py diff --git a/tests/test_sensitive_item_removal.py b/tests/unit/test_sensitive_item_removal.py similarity index 99% rename from tests/test_sensitive_item_removal.py rename to tests/unit/test_sensitive_item_removal.py index 9b6df0b..1219dcf 100644 --- a/tests/test_sensitive_item_removal.py +++ b/tests/unit/test_sensitive_item_removal.py @@ -118,6 +118,10 @@ ('username noc secret sha512 {}', '$6$RMxgK5ALGIf.nWEC$tHuKCyfNtJMCY561P52dTzHUmYMmLxb/Mxik.j3vMUs8lMCPocM00/NAS.SN6GCWx7d/vQIgxnClyQLAb7n3x0') ] +misc_password_lines = [ + ('my password is ', '$1$salt$abcdefghijklmnopqrs') +] + unique_passwords = [ '12345ABCDEF', 'ABCDEF123456789',