From 7ec64dd3de255e5e7c3f6b5c3e8aa7353ccd2626 Mon Sep 17 00:00:00 2001 From: David Cain Date: Tue, 21 Mar 2023 15:15:15 -0600 Subject: [PATCH] Separate freely-associated states & US territories (#1828) * Separate freely-associated states & US territories There are three sovereign states that are members of the Compact of Free Association. These states are not US territories, and having `state_abbr()` include them by default can cause problems with other tooling. For the purposes of a valid deliverable address, it's useful to include these freely-associated states by default. But it's also helpful to be able to dynamically exclude them. (Furthermore, there's good reason to simply be correct about how these sovereign states are referred to). * Fix Black formatting (line length is 120) --- faker/providers/address/en_US/__init__.py | 42 ++++++++++++++++------- tests/providers/test_address.py | 23 ++++++++++--- 2 files changed, 49 insertions(+), 16 deletions(-) diff --git a/faker/providers/address/en_US/__init__.py b/faker/providers/address/en_US/__init__.py index 4bf14695ad..728ecab3fa 100644 --- a/faker/providers/address/en_US/__init__.py +++ b/faker/providers/address/en_US/__init__.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from typing import Optional +from typing import Optional, Set from ..en import Provider as AddressProvider @@ -419,7 +419,8 @@ class Provider(AddressProvider): "WV": (24701, 26886), "WI": (53001, 54990), "WY": (82001, 83128), - # Territories - incomplete ranges with accurate subsets - https://www.geonames.org/postalcode-search.html + # Territories & freely-associated states + # incomplete ranges with accurate subsets - https://www.geonames.org/postalcode-search.html "AS": (96799, 96799), "FM": (96941, 96944), "GU": (96910, 96932), @@ -432,16 +433,21 @@ class Provider(AddressProvider): territories_abbr = ( "AS", - "FM", "GU", - "MH", "MP", - "PW", "PR", "VI", ) - states_and_territories_abbr = states_abbr + territories_abbr + # Freely-associated states (sovereign states; members of COFA) + # https://en.wikipedia.org/wiki/Compact_of_Free_Association + freely_associated_states_abbr = ( + "FM", + "MH", + "PW", + ) + + known_usps_abbr = states_abbr + territories_abbr + freely_associated_states_abbr military_state_abbr = ("AE", "AA", "AP") @@ -494,16 +500,28 @@ def administrative_unit(self) -> str: state = administrative_unit - def state_abbr(self, include_territories: bool = True) -> str: + def state_abbr( + self, + include_territories: bool = True, + include_freely_associated_states: bool = True, + ) -> str: """ - :returns: A random state or territory abbreviation. + :returns: A random two-letter USPS postal code + + By default, the resulting code may abbreviate any of the fity states, + five US territories, or three freely-associating sovereign states. :param include_territories: If True, territories will be included. - If False, only states will be returned. + If False, US territories will be excluded. + :param include_freely_associated_states: If True, freely-associated states will be included. + If False, sovereign states in free association with the US will be excluded. """ + abbreviations: Set[str] = set(self.states_abbr) if include_territories: - return self.random_element(self.states_and_territories_abbr) - return self.random_element(self.states_abbr) + abbreviations.update(self.territories_abbr) + if include_freely_associated_states: + abbreviations.update(self.freely_associated_states_abbr) + return self.random_element(abbreviations) def postcode(self) -> str: return "%05d" % self.generator.random.randint(501, 99950) @@ -520,7 +538,7 @@ def postcode_in_state(self, state_abbr: Optional[str] = None) -> str: if state_abbr is None: state_abbr = self.random_element(self.states_abbr) - if state_abbr in self.states_and_territories_abbr: + if state_abbr in self.known_usps_abbr: postcode = "%d" % ( self.generator.random.randint( self.states_postcode[state_abbr][0], diff --git a/tests/providers/test_address.py b/tests/providers/test_address.py index 0576484e89..baa34fdc45 100644 --- a/tests/providers/test_address.py +++ b/tests/providers/test_address.py @@ -519,14 +519,29 @@ def test_state_abbr(self, faker, num_samples): for _ in range(num_samples): state_abbr = faker.state_abbr() assert isinstance(state_abbr, str) - states_and_territories = EnUsAddressProvider.states_and_territories_abbr + states_and_territories = EnUsAddressProvider.known_usps_abbr assert state_abbr in states_and_territories + def test_state_abbr_states_only(self, faker, num_samples): + for _ in range(num_samples): + state_abbr = faker.state_abbr(include_territories=False, include_freely_associated_states=False) + assert isinstance(state_abbr, str) + assert state_abbr in EnUsAddressProvider.states_abbr + def test_state_abbr_no_territories(self, faker, num_samples): for _ in range(num_samples): state_abbr = faker.state_abbr(include_territories=False) assert isinstance(state_abbr, str) - assert state_abbr in EnUsAddressProvider.states_abbr + assert ( + state_abbr in EnUsAddressProvider.states_abbr + or state_abbr in EnUsAddressProvider.freely_associated_states_abbr + ) + + def test_state_abbr_no_freely_associated_states(self, faker, num_samples): + for _ in range(num_samples): + state_abbr = faker.state_abbr(include_freely_associated_states=False) + assert isinstance(state_abbr, str) + assert state_abbr in EnUsAddressProvider.states_abbr or state_abbr in EnUsAddressProvider.territories_abbr def test_postcode(self, faker, num_samples): for _ in range(num_samples): @@ -536,7 +551,7 @@ def test_postcode(self, faker, num_samples): def test_postcode_in_state(self, faker, num_samples): for _ in range(num_samples): - for state_abbr in EnUsAddressProvider.states_and_territories_abbr: + for state_abbr in EnUsAddressProvider.known_usps_abbr: code = faker.postcode_in_state(state_abbr) assert re.fullmatch(r"\d{5}", code) assert int(code) >= EnUsAddressProvider.states_postcode[state_abbr][0] @@ -553,7 +568,7 @@ def test_zipcode(self, faker, num_samples): def test_zipcode_in_state(self, faker, num_samples): for _ in range(num_samples): - for state_abbr in EnUsAddressProvider.states_and_territories_abbr: + for state_abbr in EnUsAddressProvider.known_usps_abbr: code = faker.zipcode_in_state(state_abbr) assert re.fullmatch(r"\d{5}", code) assert int(code) >= EnUsAddressProvider.states_postcode[state_abbr][0]