Skip to content

Commit

Permalink
Added an "Invalid SSN" generator to the en_US SSN Provider
Browse files Browse the repository at this point in the history
  • Loading branch information
darrylwhiting authored and fcurella committed May 24, 2019
1 parent 9071f74 commit c412d56
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 1 deletion.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,5 @@ nosetests.xml

# IDE
*.sw[po]
*.iml
*.ipr
68 changes: 67 additions & 1 deletion faker/providers/ssn/en_US/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@


class Provider(BaseProvider):
INVALID_SSN_TYPE = 'INVALID_SSN'
SSN_TYPE = 'SSN'
ITIN_TYPE = 'ITIN'
EIN_TYPE = 'EIN'
Expand Down Expand Up @@ -139,6 +140,68 @@ def ein(self):
ein = "{0:s}-{1:07d}".format(ein_prefix, sequence)
return ein

def invalid_ssn(self):
""" Generate a random invalid United States Social Security Identification Number (SSN).
Invalid SSNs have the following characteristics:
Cannot begin with the number 9
Cannot begin with 666 in positions 1 - 3
Cannot begin with 000 in positions 1 - 3
Cannot contain 00 in positions 4 - 5
Cannot contain 0000 in positions 6 - 9
https://www.ssa.gov/kc/SSAFactSheet--IssuingSSNs.pdf
Additionally, return an invalid SSN that is NOT a valid ITIN by excluding certain ITIN related "group" values
"""
itin_group_numbers = [
70,
71,
72,
73,
74,
75,
76,
77,
78,
79,
80,
81,
82,
83,
84,
85,
86,
87,
88,
90,
91,
92,
94,
95,
96,
97,
98,
99]
area = self.random_int(min=0, max=999)
if area < 900 and area not in {666, 0}:
random_group_or_serial = self.random_int(min=1, max=1000)
if random_group_or_serial <= 500:
group = 0
serial = self.random_int(0, 9999)
else:
group = self.random_int(0, 99)
serial = 0
elif area in {666, 0}:
group = self.random_int(0, 99)
serial = self.random_int(0, 9999)
else:
group = random.choice([x for x in range(0, 100) if x not in itin_group_numbers])
serial = self.random_int(0, 9999)

invalid_ssn = "{0:03d}-{1:02d}-{2:04d}".format(area, group, serial)
return invalid_ssn

def ssn(self, taxpayer_identification_number_type=SSN_TYPE):
""" Generate a random United States Taxpayer Identification Number of the specified type.
Expand All @@ -149,6 +212,8 @@ def ssn(self, taxpayer_identification_number_type=SSN_TYPE):
return self.itin()
elif taxpayer_identification_number_type == self.EIN_TYPE:
return self.ein()
elif taxpayer_identification_number_type == self.INVALID_SSN_TYPE:
return self.invalid_ssn()
elif taxpayer_identification_number_type == self.SSN_TYPE:

# Certain numbers are invalid for United States Social Security
Expand All @@ -166,4 +231,5 @@ def ssn(self, taxpayer_identification_number_type=SSN_TYPE):
return ssn

else:
raise ValueError("taxpayer_identification_number_type must be one of 'SSN', 'EIN', or 'ITIN'.")
raise ValueError("taxpayer_identification_number_type must be one of 'SSN', 'EIN', 'ITIN',"
" or 'INVALID_SSN'.")
92 changes: 92 additions & 0 deletions tests/providers/test_ssn.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,98 @@ def test_ssn(self):
assert 1 <= int(serial) <= 9999
assert area != '666'

def test_invalid_ssn(self):
self.factory.random = random2.Random()
# Magic Numbers below generate '666-92-7944', '000-54-2963', '956-GG-9478', '436-00-1386',
# and 134-76-0000 respectively. The "group" (GG) returned for '956-GG-9478 will be a random
# number, and that random number is not in the "itin_group_numbers" List. The random GG occurs
# even when using the same seed_instance() due to using random.choice() for GG to avoid valid
# ITINs being returned as an invalid SSN:
#
# Ensure that generated SSNs are 11 characters long
# including dashes, consist of dashes and digits only, and the tested number
# violates the requirements below, ensuring an INVALID SSN is returned:
#
# A United States Social Security Number
# (SSN) is a tax processing number issued by the Internal
# Revenue Service with the format "AAA-GG-SSSS". The
# number is divided into three parts: the first three
# digits, known as the area number because they were
# formerly assigned by geographical region; the middle two
# digits, known as the group number; and the final four
# digits, known as the serial number. SSNs with the
# following characteristics are not allocated:
#
# 1) Numbers with all zeros in any digit group
# (000-##-####, ###-00-####, ###-##-0000).
#
# 2) Numbers with 666 or 900-999 in the first digit group.
#
# https://en.wikipedia.org/wiki/Social_Security_number
#
# ITIN explained:
# https://www.irs.gov/individuals/international-taxpayers/general-itin-information

itin_group_numbers = [
70,
71,
72,
73,
74,
75,
76,
77,
78,
79,
80,
81,
82,
83,
84,
85,
86,
87,
88,
90,
91,
92,
94,
95,
96,
97,
98,
99]

self.factory.seed_instance(1143)
ssn = self.factory.ssn(taxpayer_identification_number_type='INVALID_SSN')

assert len(ssn) == 11
assert ssn.replace('-', '').isdigit()
assert ssn.startswith('666')

self.factory.seed_instance(1514)
ssn = self.factory.ssn(taxpayer_identification_number_type='INVALID_SSN')

assert ssn.startswith('000')

self.factory.seed_instance(2)
ssn = self.factory.ssn(taxpayer_identification_number_type='INVALID_SSN')
[area, group, serial] = ssn.split('-')

assert 900 <= int(area) <= 999 and int(group) not in itin_group_numbers

self.factory.seed_instance(9)
ssn = self.factory.ssn(taxpayer_identification_number_type='INVALID_SSN')
[area, group, serial] = ssn.split('-')

assert int(area) < 900 and int(group) == 0

self.factory.seed_instance(1)
ssn = self.factory.ssn(taxpayer_identification_number_type='INVALID_SSN')
[area, group, serial] = ssn.split('-')

assert int(area) < 900 and int(serial) == 0

def test_prohibited_ssn_value(self):
# 666 is a prohibited value. The magic number selected as a seed
# is one that would (if not specifically checked for) return an
Expand Down

3 comments on commit c412d56

@swehba
Copy link

@swehba swehba commented on c412d56 Aug 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When initializing the itin_group_numbers wouldn't it be clearer to do something like this:

itin_group_numbers =[n for n in range(70, 100) if n not in {89, 93}]

or even use the list as you have it but put comment out lines for 89 and 93 like this:

itin_group_numbers = [
    70,
    71,
    72,
    73,
    74,
    75,
    76,
    77,
    78,
    79,
    80,
    81,
    82,
    83,
    84,
    85,
    86,
    87,
    88,
    # 89,
    90,
    91,
    92,
    # 93,
    94,
    95,
    96,
    97,
    98,
    99]

@fcurella
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@swehba since it's a predefined list, it's fine to have it statically hardcoded. Another added benefit is micro-optimization: although the benefit is arguably negligible, a hardcoded list is less computationally expensive then one we have to compute.

Either way we're really splitting hair, there isn't much practical difference between the two approaches. The real reason the list is hardcoded is because someone submitted the code this way, and it was good enough :)

@swehba
Copy link

@swehba swehba commented on c412d56 Aug 23, 2019 via email

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.