In [None]:
# default_exp util

In [None]:
#hide
from nbdev.showdoc import *

# ProofZero Utility SDK

This module contains parsers and hash functions intended to be used by the `core` module in its workflow.

## Exported types

In [None]:
#export
from typing import NewType, Any
Hash = NewType('Hash', str)

## Hash functions

In [None]:
#export
def noop(value: Any) -> Hash:
    """
    A no-op hash function (ie, passthrough, no encryption applied). Useful for testing.
    """
    return value

In [None]:
test_noop = noop('no transform')
assert(test_noop == 'no transform')

In [None]:
#export
import struct
import hashlib
import pandas as pd
def sha2(value: Any, encoding: str = "utf-8") -> Hash:
    """
    A cross-platform implementation wrapper around SHA2. Generates consistent hashes across different hardware and interpreter platforms.
    """
    # Infer type using Python built-in so we can set formatting string.
    type_hint = type(value)

    # Prepend '!' to force system-indepent memory formatting.
    # See https://docs.python.org/3/library/struct.html#struct-format-strings
    format_code = "!"

    # Select based on type_hint.
    # See https://docs.python.org/3/library/struct.html#format-characters
    if type_hint is int:
        format_code = "".join([format_code, "i"])
    elif type_hint is float:
        format_code = "".join([format_code, "d"])
    elif type_hint is bool:
        format_code = "".join([format_code, "?"])
    elif type_hint is str:
        # Decode the string into an immutable byte buffer (see below).
        value = bytes(value, encoding)
        format_code = "".join([format_code, str(len(value)), "s"])
    else:
        # Best-efforts conversion of the value into a string representation.
        value = repr(value)

        # Now treat it as a string (see above).
        value = bytes(value, encoding)
        format_code = "".join([format_code, str(len(value)), "s"])

    # Pack the value into a struct so we can get its memory representation.
    # The format_code having a prepended '!' ensures platform independence.
    value_buf = struct.pack(format_code, value)

    # Hash the buffer and return the hex string.
    return Hash(hashlib.sha256(value_buf).hexdigest())

In [None]:
cryptotext = sha2('test')
assert (cryptotext == '9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08')

## Parsing functions

In [None]:
#export
from typing import List
def parseString(value: str, delim: str = " ") -> List[str]:
    """
    Tokenize the passed string with an arbitrary delimiter, `delim`.
    """
    return value.split(sep=delim, maxsplit=-1)

In [None]:
test_string = parseString('This-is-a-test', delim='-')
assert(test_string == ['This', 'is', 'a', 'test'])

In [None]:
test_string = parseString('This is a test')
assert(test_string == ['This', 'is', 'a', 'test'])

In [None]:
test_string = parseString('abc123')
assert(test_string == ['abc123'])

In [None]:
#export
from nameparser import HumanName
def parseName(value: str) -> List[str]:
    """
    Tokenize the passed human name field in an opinionated way.
    """
    name = HumanName(value)
    name_dict = name.as_dict()
    return [(k, v) for (k, v) in name_dict.items()]

In [None]:
test_name = parseName('Mr. Alex "Al" R. Flanagan I')
assert(test_name == [('title', 'Mr.'), ('first', 'Alex'), ('middle', 'R.'), ('last', 'Flanagan'), ('suffix', 'I'), ('nickname', 'Al')])

In [None]:
#export
def parseAddress(value: str, delim: str = " ") -> List[str]:
    """
    Tokenize the passed address field in an opinionated way.
    """
    return parseString(value, delim)

In [None]:
test_address = parseAddress('92 Yonge St., Toronto Ontario, M5J 0B1')
assert(test_address == ['92', 'Yonge', 'St.,', 'Toronto', 'Ontario,', 'M5J', '0B1'])

In [None]:
#export
import phonenumbers
from typing import Tuple, Union
def parsePhone(value: str) -> List[Tuple[Union[str, int, None], int]]:
    """
    Tokenize the passed phone number field in an opinionated way.
    """
    # Get a phone number from the passed value.
    region = "US"
    phone_obj = phonenumbers.parse(value, region, keep_raw_input=True)
    # Use RFC3966 because it handles extensions.
    number_format = phonenumbers.PhoneNumberFormat.RFC3966
    phone_parsed = phonenumbers.format_number(phone_obj, number_format)

    national_number = str(phone_obj.national_number)
    area_code = national_number[:3]
    central_office = national_number[3:6]
    line_number = national_number[6:]

    retval = [
        (str(phone_parsed), 0),
        (str(phone_obj.country_code), 1),
        (str(area_code), 2),
        (str(central_office), 3),
        (str(line_number), 4),
        (str(phone_obj.national_number), 5),
        (str(phone_obj.extension), 6),
        (str(phone_obj.italian_leading_zero), 7),
        (str(phone_obj.number_of_leading_zeros), 8),
        (str(phone_obj.raw_input), 9),
        (str(phone_obj.country_code_source), 10),
        (str(phone_obj.preferred_domestic_carrier_code), 11),
    ]

    return retval

In [None]:
test_phone = parsePhone('647-927-7392')
assert (test_phone == [('tel:+1-647-927-7392', 0), ('1', 1), ('647', 2), ('927', 3), ('7392', 4), ('6479277392', 5), ('None', 6), ('None', 7), ('None', 8), ('647-927-7392', 9), ('20', 10), ('None', 11)])

In [None]:
#export
from stdnum.ca import sin
def parseSIN(value: str) -> List[Tuple[Union[str, int, None], int]]:
    """
    Tokenize the passed SIN field in an opinionated way.
    """
    # Get a sin from the passed value.
    valid_sin = sin.validate(value)  # Can raise and return False
    sin_val = sin.format(valid_sin)
    components = sin_val.split("-")
    return [
        (sin_val, 0),
        (components[0], 1),
        (components[1], 2),
        (components[2], 3),
    ]

In [None]:
test_social = parseSIN('046 454 286')
assert(test_social == [('046-454-286', 0), ('046', 1), ('454', 2), ('286', 3)])

In [None]:
#export
from datetime import datetime
import dateparser
def parseDate(value: str) -> List[Tuple[Union[str, int, None], int]]:
    """
    Tokenize the passed date field in an opinionated way.
    """
    # Get a datetime from the passed value.
    parse_val: Optional[datetime] = dateparser.parse(value)
    if parse_val is None:
        return [(None, 0)]
    return [
        (parse_val.date().isoformat(), 0),
        (str(parse_val.year), 1),
        (str(parse_val.month), 2),
        (str(parse_val.day), 3),
    ]

In [None]:
test_date = parseDate('1999-07-01')
assert(test_date == [('1999-07-01', 0), ('1999', 1), ('7', 2), ('1', 3)])