# Postal Code for the US

**[Work in progress]**

This notebook adds US zip code, place name, latitude, longitude data to .csv files for ingestion into the Knowledge Graph.

Data source: [GeoNames.org](http://download.geonames.org/export/zip/)

Author: Peter Rose (pwrose@ucsd.edu)

In [1]:
import os
from pathlib import Path
from io import BytesIO
import requests
from zipfile import ZipFile
import pandas as pd

In [2]:
pd.options.display.max_rows = None  # display all rows
pd.options.display.max_columns = None  # display all columsns

In [3]:
NEO4J_HOME = Path(os.getenv('NEO4J_HOME'))
print(NEO4J_HOME)

/Users/peter/Library/Application Support/Neo4j Desktop/Application/neo4jDatabases/database-4af96121-2328-4e2f-ba60-6d8b728a26d5/installation-4.0.3


### Download data from GeoNames

In [4]:
zip_url = 'http://download.geonames.org/export/zip/US.zip'

In [5]:
content = requests.get(zip_url)
zf = ZipFile(BytesIO(content.content))

for item in zf.namelist():
    print("File in zip: "+  item)

File in zip: readme.txt
File in zip: US.txt


In [6]:
columns = ['country code', 'postal code', 'place name', 'admin name1', 'admin code1', 'admin name2', 'admin code2', 
           'admin name3', 'admin code3', 'latitude', 'longitude', 'accuracy']

In [7]:
df = pd.read_csv(zf.open('US.txt'), names=columns, sep='\t', dtype='str', header=0)
df.fillna('', inplace=True)

In [8]:
df.head()

Unnamed: 0,country code,postal code,place name,admin name1,admin code1,admin name2,admin code2,admin name3,admin code3,latitude,longitude,accuracy
0,US,99571,Cold Bay,Alaska,AK,Aleutians East,13,,,55.1858,-162.7211,1
1,US,99583,False Pass,Alaska,AK,Aleutians East,13,,,54.8542,-163.4113,1
2,US,99612,King Cove,Alaska,AK,Aleutians East,13,,,55.0628,-162.3056,1
3,US,99661,Sand Point,Alaska,AK,Aleutians East,13,,,55.3192,-160.4914,1
4,US,99546,Adak,Alaska,AK,Aleutians West (CA),16,,,51.874,-176.634,1


In [9]:
df.query("`postal code` == '92130'")

Unnamed: 0,country code,postal code,place name,admin name1,admin code1,admin name2,admin code2,admin name3,admin code3,latitude,longitude,accuracy
4190,US,92130,San Diego,California,CA,San Diego,73,,,32.9555,-117.2252,4


In [10]:
df.rename(columns={'postal code': 'zip'}, inplace=True)
df.rename(columns={'place name': 'placeName'}, inplace=True)

Create a GeoNames code for admin2 division

In [11]:
df['admin2_id'] = df['country code'] + '.' + df['admin code1'] + '.' + df['admin code2']

In [12]:
df = df[['zip', 'admin2_id', 'placeName', 'latitude', 'longitude']]

In [13]:
df.query("zip == '92130'")

Unnamed: 0,zip,admin2_id,placeName,latitude,longitude
4190,92130,US.CA.073,San Diego,32.9555,-117.2252


In [14]:
df.to_csv(NEO4J_HOME / "import/00o-GeoNamesPostalCode.csv", index=False)

In [15]:
df.head()

Unnamed: 0,zip,admin2_id,placeName,latitude,longitude
0,99571,US.AK.013,Cold Bay,55.1858,-162.7211
1,99583,US.AK.013,False Pass,54.8542,-163.4113
2,99612,US.AK.013,King Cove,55.0628,-162.3056
3,99661,US.AK.013,Sand Point,55.3192,-160.4914
4,99546,US.AK.016,Adak,51.874,-176.634
