# Simple zip code geocoder

Submit a CSV of zip codes or world FIPS code and get the centroid of that code's polygon. Writes out the input CSV plus the longitude, and latitude of the centroid. If a match is not found, it will get Null Island.


## Centroids derived from polygon layers
US list from: https://www.census.gov/geo/maps-data/data/cbf/cbf_zcta.html
World states and provinces from: https://www.naturalearthdata.com/downloads/10m-cultural-vectors/

## Requirements
Input CSV file must have a zip code or world FIPS code field encoded as a string. Look at the [zip code csv](zipcodes_latlng.csv) or the [world FIPS code CSV](world_states_latlng.csv) to make sure your address input has a correct value.

In [94]:
import csv
import random
import subprocess
yourAddresses = "addresses.csv"
yourResults = "geocoded_addresses.csv"

In [95]:
# Open the CSV file and load it into an iterable list
with open("zipcodes_latlng.csv") as zipcodes:
    # Create iterable csv.reader object
    zips = csv.reader(zipcodes)
    # Let's create a dictionary of centroids for all US zip codes. Note that i isn't used. 
    usdict = dict((row[0],[row[-2],row[-1]]) for i, row in enumerate(zips))

In [96]:
with open("world_states_latlng.csv") as world:
    # Create iterable csv.reader object
    worldProvs = csv.reader(world)
    # Let's create a dictionary of centroids of world provinces
    worlddict = dict((row[6],[row[-2],row[-1]]) for i, row in enumerate(worldProvs))

In [97]:
# Open the addresses CSV and create a results file
with open(yourAddresses) as addresses:
    with open("temp.csv", 'w') as results:    
        reader = csv.reader(addresses)
        writer = csv.writer(results, quotechar = '"', quoting=csv.QUOTE_ALL)
       
        # Add field labels
        writer.writerow(next(reader, []) + ['x', 'y'])
        
        # Iterate through addresses and compare each zip code to all zip codes in US list. 32k+ zip codes 
        # Geocoding about 670 addresses per second
        for row in reader:
            
            # Compare selected zip code to all us zip codes, 32k+
            if row[2] in usdict:
                writer.writerow(row + [usdict[row[2]][0],usdict[row[2]][1]])

            # Compare to world states/provences FIPS code
            elif row[5] in worlddict:
                print(f"####\n{row[2]}: no match. Attempting world provinences @ {row[5]}\n####")
                writer.writerow(row + [worlddict[row[5]][0],worlddict[row[5]][1]])
                
            else:
                # Null Island, ahoy!
                print(f"*********************\n{row[2]} or {row[5]}: Null Island\n********************")
                writer.writerow(row + [0,0])
        
                

####
4103: no match. Attempting world provinences @ AS04
####
####
123456: no match. Attempting world provinences @ PK05
####
####
na: no match. Attempting world provinences @ AE01
####
####
L2N4C3: no match. Attempting world provinences @ CA08
####
*********************
5999 or ??: Null Island
********************
####
V4T1B9: no match. Attempting world provinences @ CA02
####
####
HT7120: no match. Attempting world provinences @ HA14
####
####
100102: no match. Attempting world provinences @ CH22
####


In [98]:
# Check for duplicates and add some fuzz to the location to prevent coincident points

with open("temp.csv") as addresses:
    reader = csv.reader(addresses)
    dupcheck = dict((row[-2], row[-1]) for i, row in enumerate(reader))
    

count = {}

with open("temp.csv") as addresses:
    with open(yourResults, 'w') as results: 
        reader = csv.reader(addresses)
        writer = csv.writer(results, quotechar = '"', quoting=csv.QUOTE_ALL)
        for row in reader:
            if row[-2] in dupcheck:
                count[row[-2]] = count.get(row[-2],0) + 1
            if count[row[-2]] > 1:
                a = random.random() * .001 * random.randrange(-1,2,2)
                b = random.random() * .001 * random.randrange(-1,2,2)
                row[-2] = str(float(row[-2]) + a)
                row[-1] = str(float(row[-1]) + b)
            writer.writerow(row)


In [99]:
subprocess.run('rm temp.csv', shell=True, stdout=subprocess.PIPE)

CompletedProcess(args='rm temp.csv', returncode=0, stdout=b'')