Skip to content

Commit

Permalink
Python2 support (#58)
Browse files Browse the repository at this point in the history
* identify AS numbers that are not a subset of larger numbers and replace AS numbers all at once

* renamed test function

* more test cases

* using lookahead and lookbehind for consistency with other regex sub

* created AS number anonymizer

* remove unused arg

* reorder class functions

* class case consistency

* class case consistency

* added unicode check in ip anonymization, added e2e test

* flake and path fix

* Merge branch 'master' into python2-support

* added ensure unicode function, udpated testpaths

* cleaned up e2e path/string code a little, moved ensure_unicode function

* flake fixes
  • Loading branch information
sfraint authored and dhalperi committed Apr 5, 2018
1 parent 63e107d commit 7f5f6d8
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 10 deletions.
2 changes: 1 addition & 1 deletion .travis/build.sh
Expand Up @@ -10,7 +10,7 @@ echo -e "\n ..... Running flake8 on netconan to check style and docstrings"
# Configuration for flake8 is taken from setup.cfg
flake8

echo -e "\n ..... Running unit tests with pytest"
echo -e "\n ..... Running tests with pytest"
python setup.py test

set +x
Expand Down
13 changes: 10 additions & 3 deletions netconan/ip_anonymization.py
Expand Up @@ -14,14 +14,15 @@
# limitations under the License.

from abc import ABCMeta, abstractmethod

from bidict import bidict
import ipaddress
import logging
# Need regex instead of re for variable look behind
import regex

from hashlib import md5
from six import add_metaclass, iteritems
from six import add_metaclass, iteritems, text_type, u


# Deliberately catching more than valid IPs so we can remove 0s later.
Expand Down Expand Up @@ -52,6 +53,12 @@ def _generate_bit_from_hash(salt, string):
return int(last_hash_digit, 16) & 1


def _ensure_unicode(str):
if not isinstance(str, text_type):
str = u(str)
return str


@add_metaclass(ABCMeta)
class _BaseIpAnonymizer:
def __init__(self, salt, length, salter=_generate_bit_from_hash):
Expand Down Expand Up @@ -174,7 +181,7 @@ def make_addr(cls, addr_str):
as octal (1.2.3.32).
"""
addr_str = IpAnonymizer._DROP_ZEROS_PATTERN.sub(r'\1.\2.\3.\4', addr_str)
return ipaddress.IPv4Address(addr_str)
return ipaddress.IPv4Address(_ensure_unicode(addr_str))

@classmethod
def make_addr_from_int(cls, ip_int):
Expand All @@ -201,7 +208,7 @@ def get_addr_pattern(cls):
@classmethod
def make_addr(cls, addr_str):
"""Return an IPv6 address from the given string."""
return ipaddress.IPv6Address(addr_str)
return ipaddress.IPv6Address(_ensure_unicode(addr_str))

@classmethod
def make_addr_from_int(cls, ip_int):
Expand Down
4 changes: 2 additions & 2 deletions netconan/sensitive_item_removal.py
Expand Up @@ -29,8 +29,8 @@
# These are catch-all regexes to find lines that seem like they might contain
# sensitive info
default_catch_all_regexes = [
[('(\S* )*"?\K(\$9\$[^ ;"]+)(?="? ?.*)', 2)],
[('(\S* )*"?\K(\$1\$[^ ;"]+)(?="? ?.*)', 2)],
[('(\S* )*"?\K(\$9\$[^\s;"]+)(?="? ?.*)', 2)],
[('(\S* )*"?\K(\$1\$[^\s;"]+)(?="? ?.*)', 2)],
[('(\S* )*encrypted-password \K(\S+)(?= ?.*)', None)],
[('(\S* ?)*key "\K([^"]+)(?=".*)', 2)]
]
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Expand Up @@ -24,4 +24,4 @@ universal=1
[flake8]
filename=*.py,
ignore=E501,D401
exclude=docs,__pychache__,.eggs,*.egg,build
exclude=docs,__pychache__,.eggs,*.egg,build,virtualEnv
60 changes: 60 additions & 0 deletions tests/end_to_end/test_end_to_end.py
@@ -0,0 +1,60 @@
"""Test Netconan from end to end."""
# Copyright 2018 Intentionet
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import filecmp

from netconan.netconan import main


def test_end_to_end(tmpdir):
"""Test Netconan main with simulated input file and commandline args."""
input_contents = """
# Intentionet's sensitive test file
ip address 192.168.2.1 255.255.255.255
my password is $1$salt$ABCDEFGHIJKLMNOPQRS
AS num 12345 and 65432 should be changed
"""
ref_contents = """
# 1cbbc2's fd8607 test file
ip address 201.235.139.13 255.255.255.255
my password is $1$0000$CxUUGIrqPb7GaB5midrQZ.
AS num 8625 and 64818 should be changed
"""

filename = "test.txt"
input_dir = tmpdir.mkdir("input")
input_dir.join(filename).write(input_contents)

output_dir = tmpdir.mkdir("output")
output_file = output_dir.join(filename)

ref_file = tmpdir.join(filename)
ref_file.write(ref_contents)

args = [
'-i', str(input_dir),
'-o', str(output_dir),
'-s', 'TESTSALT',
'-a',
'-p',
'-w', 'intentionet,sensitive',
'-n', '65432,12345'
]
main(args)

# Make sure output file matches the ref
assert(filecmp.cmp(str(ref_file), str(output_file)))
Expand Up @@ -13,14 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import unicode_literals

import ipaddress
import pytest
import regex

from netconan.ip_anonymization import (
IpAnonymizer, IpV6Anonymizer, anonymize_ip_addr)
from six import u

ip_v4_list = [
('10.11.12.13'),
Expand Down Expand Up @@ -329,7 +328,7 @@ def test_false_positives(anonymizer_v4, anonymizer_v6, line):
])
def test_v4_anonymizer_ignores_leading_zeros(anonymizer_v4, zeros, no_zeros):
"""Test that v4 IP address ignore leading zeros & don't interpret octal."""
assert(ipaddress.IPv4Address(no_zeros) == anonymizer_v4.make_addr(zeros))
assert(ipaddress.IPv4Address(u(no_zeros)) == anonymizer_v4.make_addr(zeros))


@pytest.mark.parametrize('ip_int, expected', [
Expand Down
File renamed without changes.
Expand Up @@ -118,6 +118,10 @@
('username noc secret sha512 {}', '$6$RMxgK5ALGIf.nWEC$tHuKCyfNtJMCY561P52dTzHUmYMmLxb/Mxik.j3vMUs8lMCPocM00/NAS.SN6GCWx7d/vQIgxnClyQLAb7n3x0')
]

misc_password_lines = [
('my password is ', '$1$salt$abcdefghijklmnopqrs')
]

unique_passwords = [
'12345ABCDEF',
'ABCDEF123456789',
Expand Down

0 comments on commit 7f5f6d8

Please sign in to comment.