From c741843e970f26f1ace3a3dedde7b2d5dfd11fee Mon Sep 17 00:00:00 2001
From: Camila Maia <cmaiacd@gmail.com>
Date: Sat, 24 Jun 2023 14:10:25 +0200
Subject: [PATCH] Refatoracao do codigo dos utilitarios de cpf (#105)

---
 brutils/cpf.py    | 115 ++++++++++++++++++----------------------------
 tests/test_cpf.py |  39 ++++++++--------
 2 files changed, 64 insertions(+), 90 deletions(-)

diff --git a/brutils/cpf.py b/brutils/cpf.py
index 74aa83d..d0dad3c 100644
--- a/brutils/cpf.py
+++ b/brutils/cpf.py
@@ -7,33 +7,31 @@
 
 def sieve(dirty):  # type: (str) -> str
     """
-    Filters out CPF formatting symbols. Symbols that are not used
-    in the CPF formatting are left unfiltered on purpose so that
-    if fails other tests, because their presence indicate that the
-    input was somehow corrupted.
+    Filters out CPF formatting symbols.
+
+    Symbols that are not used in the CPF formatting are left
+    unfiltered on purpose so that if fails other tests,
+    because their presence indicate that the input was somehow corrupted.
     """
 
     return "".join(filter(lambda char: char not in ".-", dirty))
 
 
 def parse(dirty):  # type: (str) -> str
-    """
-    Filters out CPF formatting symbols. Symbols that are not used
-    in the CPF formatting are left unfiltered on purpose so that
-    if fails other tests, because their presence indicate that the
-    input was somehow corrupted.
-    """
-
+    """Alias to the function `sieve`. Better naming."""
     return sieve(dirty)
 
 
 def display(cpf):  # type: (str) -> str
     """
-    Will format an adequately formatted numbers-only CPF string,
+    Format an adequately formatted numbers-only CPF string,
     adding in standard formatting visual aid symbols for display.
+    Backcompatibility for Version 1.0.1.
     """
+
     if not cpf.isdigit() or len(cpf) != 11 or len(set(cpf)) == 1:
         return None
+
     return "{}.{}.{}-{}".format(cpf[:3], cpf[3:6], cpf[6:9], cpf[9:])
 
 
@@ -46,37 +44,8 @@ def format_cpf(cpf):  # type: (str) -> str
 
     if not is_valid(cpf):
         return None
-    return "{}.{}.{}-{}".format(cpf[:3], cpf[3:6], cpf[6:9], cpf[9:])
 
-
-# CALCULATORS
-#############
-
-
-def hashdigit(cpf, position):  # type: (str, int) -> int
-    """
-    Will compute the given `position` checksum digit for the `cpf`
-    input. The input needs to contain all elements previous to
-    `position` else computation will yield the wrong result.
-    """
-    val = (
-        sum(
-            int(digit) * weight
-            for digit, weight in zip(cpf, range(position, 1, -1))
-        )
-        % 11
-    )
-    return 0 if val < 2 else 11 - val
-
-
-def checksum(basenum):  # type: (str) -> str
-    """
-    Will compute the checksum digits for a given CPF base number.
-    `basenum` needs to be a digit-string of adequate length.
-    """
-    verifying_digits = str(hashdigit(basenum, 10))
-    verifying_digits += str(hashdigit(basenum + verifying_digits, 11))
-    return verifying_digits
+    return "{}.{}.{}-{}".format(cpf[:3], cpf[3:6], cpf[6:9], cpf[9:])
 
 
 # OPERATIONS
@@ -88,54 +57,58 @@ def validate(cpf):  # type: (str) -> bool
     Returns whether or not the verifying checksum digits of the
     given `cpf` match it's base number. Input should be a digit
     string of proper length.
+
+    Source: https://www.geradorcpf.com/algoritmo_do_cpf.htm
+    Backcompatibility for Version 1.0.1.
     """
+
     if not cpf.isdigit() or len(cpf) != 11 or len(set(cpf)) == 1:
         return False
-    return all(hashdigit(cpf, i + 10) == int(v) for i, v in enumerate(cpf[9:]))
 
-
-def generate():  # type: () -> str
-    """Generates a random valid CPF digit string."""
-    base = str(randint(1, 999999998)).zfill(9)
-    while len(set(base)) == 1:
-        base = str(randint(1, 999999998)).zfill(9)
-    return base + checksum(base)
+    return all(_hashdigit(cpf, i + 10) == int(v) for i, v in enumerate(cpf[9:]))
 
 
 def is_valid(cpf):  # type: (str) -> bool
     """
-    Returns whether or not a cpf is_valid.
-    Source: https://www.geradorcpf.com/algoritmo_do_cpf.htm
+    Evaluates that cpf is String and calls validate.
     """
-    is_syntax_valid = isinstance(cpf, str) and len(cpf) == 11 and cpf.isdigit()
 
-    return is_syntax_valid and _is_semantic_valid(cpf)
+    return isinstance(cpf, str) and validate(cpf)
 
 
-def _is_semantic_valid(cpf):
-    cpf = [int(digit) for digit in cpf]
+def generate():  # type: () -> str
+    """Generates a random valid CPF digit string."""
 
-    constants_tenth_digit = [10, 9, 8, 7, 6, 5, 4, 3, 2]
-    is_tenth_digit_valid = _is_digit_valid(cpf, constants_tenth_digit, 9)
+    base = str(randint(1, 999999998)).zfill(9)
 
-    constants_eleventh_digit = [11, 10, 9, 8, 7, 6, 5, 4, 3, 2]
-    is_eleventh_digit_valid = _is_digit_valid(cpf, constants_eleventh_digit, 10)
+    return base + _checksum(base)
 
-    return is_tenth_digit_valid and is_eleventh_digit_valid
 
+def _hashdigit(cpf, position):  # type: (str, int) -> int
+    """
+    Will compute the given `position` checksum digit for the `cpf`
+    input. The input needs to contain all elements previous to
+    `position` else computation will yield the wrong result.
+    """
 
-def _is_digit_valid(cpf, constants, digit_index):
-    sum = _multiply_and_sum_lists(cpf, constants, digit_index)
-    rest = sum % 11
-    digit = cpf[digit_index]
+    val = (
+        sum(
+            int(digit) * weight
+            for digit, weight in zip(cpf, range(position, 1, -1))
+        )
+        % 11
+    )
 
-    return (rest <= 2 and digit == 0) or (rest > 2 and digit == (11 - rest))
+    return 0 if val < 2 else 11 - val
 
 
-def _multiply_and_sum_lists(list_1, list_2, max_index):
-    sum = 0
+def _checksum(basenum):  # type: (str) -> str
+    """
+    Will compute the checksum digits for a given CPF base number.
+    `basenum` needs to be a digit-string of adequate length.
+    """
 
-    for index in range(0, max_index):
-        sum += list_1[index] * list_2[index]
+    verifying_digits = str(_hashdigit(basenum, 10))
+    verifying_digits += str(_hashdigit(basenum + verifying_digits, 11))
 
-    return sum
+    return verifying_digits
diff --git a/tests/test_cpf.py b/tests/test_cpf.py
index f1e1550..434ddef 100644
--- a/tests/test_cpf.py
+++ b/tests/test_cpf.py
@@ -12,13 +12,13 @@
 from brutils.cpf import (
     sieve,
     display,
-    hashdigit,
-    checksum,
     validate,
     generate,
     is_valid,
     format_cpf,
     parse,
+    _hashdigit,
+    _checksum,
 )
 from unittest import TestCase, main
 
@@ -33,12 +33,10 @@ def test_sieve(self):
         assert sieve("...---...") == ""
 
     def test_parse(self):
-        assert parse("00000000000") == "00000000000"
-        assert parse("123.456.789-10") == "12345678910"
-        assert parse("134..2435.-1892.-") == "13424351892"
-        assert parse("abc1230916*!*&#") == "abc1230916*!*&#"
-        assert parse("ab.c1.--.2-309.-1-.6-.*.-!*&#") == "abc1230916*!*&#"
-        assert parse("...---...") == ""
+        with patch("brutils.cpf.sieve") as mock_sieve:
+            # When call parse, it calls sieve
+            parse("123.456.789-10")
+            mock_sieve.assert_called()
 
     def test_display(self):
         assert display("00000000011") == "000.000.000-11"
@@ -58,16 +56,6 @@ def test_format_cpf(self):
             # When cpf isn't valid, returns None
             assert format_cpf("11144477735") is None
 
-    def test_hashdigit(self):
-        assert hashdigit("000000000", 10) == 0
-        assert hashdigit("0000000000", 11) == 0
-        assert hashdigit("52513127765", 10) == 6
-        assert hashdigit("52513127765", 11) == 5
-
-    def test_checksum(self):
-        assert checksum("000000000") == "00"
-        assert checksum("525131277") == "65"
-
     def test_validate(self):
         assert validate("52513127765")
         assert validate("52599927765")
@@ -83,6 +71,9 @@ def test_is_valid(self):
         # When cpf does not contain only digits, returns False
         assert not is_valid("1112223334-")
 
+        # When CPF has only the same digit, returns false
+        assert not is_valid("11111111111")
+
         # When rest_1 is lt 2 and the 10th digit is not 0, returns False
         assert not is_valid("11111111215")
 
@@ -100,10 +91,20 @@ def test_is_valid(self):
         assert is_valid("11111111200")
 
     def test_generate(self):
-        for i in range(1000):
+        for _ in range(10_000):
             assert validate(generate())
             assert display(generate()) is not None
 
+    def test__hashdigit(self):
+        assert _hashdigit("000000000", 10) == 0
+        assert _hashdigit("0000000000", 11) == 0
+        assert _hashdigit("52513127765", 10) == 6
+        assert _hashdigit("52513127765", 11) == 5
+
+    def test_checksum(self):
+        assert _checksum("000000000") == "00"
+        assert _checksum("525131277") == "65"
+
 
 if __name__ == "__main__":
     main()