Using American Community Survey data via python package CensusData (install: pip install CensusData; documentation: https://jtleider.github.io/censusdata/)

try using 1 year estimates: though the least accurate, it is the most recent.  And although it is only available for populations >=65,000, this is a good limit to impose to keep the number of areas examined in check

In [1]:
import pandas as pd
import censusdata
import re
import urllib, json, requests
import time

In [2]:
# https://jtleider.github.io/censusdata/example2.html
pd.set_option('display.expand_frame_repr', False) # the frame will be huge, don't expand
pd.set_option('display.precision', 4)

In [3]:
# searched 2018 1-year ACS data profiles for specific fields to download using search terms like 'income', 'poverty', 'insurance', 'transportation', etc.
# DP = data profile table; explanation of table types at https://www.census.gov/programs-surveys/acs/guidance/which-data-tool/table-ids-explained.html

censusdata.search('acs1', 2018, 'label', 'transportation', 'profile')
censusdata.search('acs1', 2018, 'label', 'area', 'profile')
# use fields('vars') 'DP02_0015E', 'DP03_0119PE', 'DP03_0051E', 'DP03_0021PE', 'DP05_0001E', 'DP05_0029E', 'DP03_0095PE', 'DP04_0143PE'

[('DP02PR_0091E',
  'SELECTED SOCIAL CHARACTERISTICS IN PUERTO RICO',
  'Estimate!!PLACE OF BIRTH!!Total population!!Native!!Born in U.S. Island Areas, or born abroad of American parents'),
 ('DP02PR_0091PE',
  'SELECTED SOCIAL CHARACTERISTICS IN PUERTO RICO',
  'Percent Estimate!!PLACE OF BIRTH!!Total population!!Native!!Born in U.S. Island Areas, or born abroad of American parents'),
 ('DP02_0091E',
  'SELECTED SOCIAL CHARACTERISTICS IN THE UNITED STATES',
  'Estimate!!PLACE OF BIRTH!!Total population!!Native!!Born in Puerto Rico, U.S. Island areas, or born abroad to American parent(s)'),
 ('DP02_0091PE',
  'SELECTED SOCIAL CHARACTERISTICS IN THE UNITED STATES',
  'Percent Estimate!!PLACE OF BIRTH!!Total population!!Native!!Born in Puerto Rico, U.S. Island areas, or born abroad to American parent(s)')]

In [4]:
fields = ['DP02_0015E', 'DP03_0119PE', 'DP03_0051E', 'DP03_0021PE', 'DP05_0001E', 'DP05_0029E', 'DP03_0095PE', 'DP04_0143PE']
counties = censusdata.download('acs1', 2018, censusdata.censusgeo([('county', '*')]), fields, tabletype = 'profile')

In [5]:
counties.describe()

Unnamed: 0,DP02_0015E,DP03_0119PE,DP03_0051E,DP03_0021PE,DP05_0001E,DP05_0029E,DP03_0095PE,DP04_0143PE
count,827.0,838.0,838.0,838.0,838.0,838.0,838.0,838.0
mean,2.5905,9.3678,123120.0,-15513000.0,333710.0,51702.0,329340.0,-889150000.0
std,0.2503,5.2102,204640.0,123660000.0,587270.0,82760.0,582460.0,5424900.0
min,1.9,1.3,20411.0,-1000000000.0,62607.0,6359.0,57066.0,-1000000000.0
25%,2.41,5.9,36032.0,0.3,95300.0,16251.0,93264.0,-888890000.0
50%,2.55,8.5,59585.0,0.7,159040.0,26132.0,156720.0,-888890000.0
75%,2.71,11.5,126200.0,1.8,330890.0,52966.0,327890.0,-888890000.0
max,4.11,49.2,3313900.0,60.7,10106000.0,1376000.0,10035000.0,-888890000.0


In [6]:
# apparently censusdata doesn't retrieve labels with the download, so creating a dictionary with that info now
field_info = dict()
for field in fields:
    field_info[field] = censusdata.variable_info.censustable('acs1', 2018, table=re.sub("_.+$", "", field)).get(field)
field_info

{'DP02_0015E': {'label': 'Estimate!!HOUSEHOLDS BY TYPE!!Total households!!Average household size',
  'concept': 'SELECTED SOCIAL CHARACTERISTICS IN THE UNITED STATES',
  'predicateType': 'float'},
 'DP03_0119PE': {'label': 'Percent Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE PAST 12 MONTHS IS BELOW THE POVERTY LEVEL!!All families',
  'concept': 'SELECTED ECONOMIC CHARACTERISTICS',
  'predicateType': 'float'},
 'DP03_0051E': {'label': 'Estimate!!INCOME AND BENEFITS (IN 2018 INFLATION-ADJUSTED DOLLARS)!!Total households',
  'concept': 'SELECTED ECONOMIC CHARACTERISTICS',
  'predicateType': 'int'},
 'DP03_0021PE': {'label': 'Percent Estimate!!COMMUTING TO WORK!!Workers 16 years and over!!Public transportation (excluding taxicab)',
  'concept': 'SELECTED ECONOMIC CHARACTERISTICS',
  'predicateType': 'float'},
 'DP05_0001E': {'label': 'Estimate!!SEX AND AGE!!Total population',
  'concept': 'ACS DEMOGRAPHIC AND HOUSING ESTIMATES',
  'predicateType': 'int'},
 'DP05_0029E':

In [7]:
# creating shorter labels...
fields = ['DP02_0015E', 'DP03_0119PE', 'DP03_0051E', 'DP03_0021PE', 'DP05_0001E', 'DP05_0029E', 'DP03_0095PE', 'DP04_0143PE']
labels = ['household_size', 'prc_fam_poverty', 'avg_income', 'prc_public_transp', 'population', 'pop_65_plus', 'prc_health_ins', 'avg_rent_prc_income']

In [11]:
counties.columns = labels

In [12]:
counties.head()

Unnamed: 0,household_size,prc_fam_poverty,avg_income,prc_public_transp,population,pop_65_plus,prc_health_ins,avg_rent_prc_income
"Morgan County, Alabama: Summary level: 050, state:01> county:103",2.56,9.9,45851,0.4,119089,20464,117677,-888888888
"Kings County, California: Summary level: 050, state:06> county:031",3.15,15.6,43727,0.5,151366,15413,136372,-888888888
"Monterey County, California: Summary level: 050, state:06> county:053",3.31,10.5,126299,1.3,435594,59491,419413,-888888888
"Nevada County, California: Summary level: 050, state:06> county:057",2.37,5.1,41447,0.1,99696,27746,98472,-888888888
"Shasta County, California: Summary level: 050, state:06> county:089",2.59,9.5,68198,0.8,180040,37027,178552,-888888888


In [15]:
county = counties.index
county

Index([         Morgan County, Alabama: Summary level: 050, state:01> county:103,
              Kings County, California: Summary level: 050, state:06> county:031,
           Monterey County, California: Summary level: 050, state:06> county:053,
             Nevada County, California: Summary level: 050, state:06> county:057,
             Shasta County, California: Summary level: 050, state:06> county:089,
               Yuba County, California: Summary level: 050, state:06> county:115,
              Douglas County, Colorado: Summary level: 050, state:08> county:035,
         Fairfield County, Connecticut: Summary level: 050, state:09> county:001,
                 Kent County, Delaware: Summary level: 050, state:10> county:001,
           New Castle County, Delaware: Summary level: 050, state:10> county:003,
       ...
             Kitsap County, Washington: Summary level: 050, state:53> county:035,
          Snohomish County, Washington: Summary level: 050, state:53> county:061,
     

In [21]:
state = [re.sub(": Summary.+$", "", x.name) for x in county] # move "County, State" to state list
county = [re.sub(", .+$", "", x) for x in state] # move just "County" to county list
state = [re.sub("^.*, ", "", x) for x in state] # remove "County, " to have just "State" in state list

In [25]:
state[0:5]

['Alabama', 'California', 'California', 'California', 'California']

In [26]:
county[0:5]

['Morgan County',
 'Kings County',
 'Monterey County',
 'Nevada County',
 'Shasta County']

In [39]:
county # remove "County", "Parish", "Borough", "city"?, "Municipio", "Municipality"?
#[number for number in numbers if number < 3]
[x for x in county if "County" not in x]

['Lafayette Parish',
 'Jefferson Parish',
 'Lafourche Parish',
 'Baltimore city',
 'St. Louis city',
 'Alexandria city',
 'Suffolk city',
 'Caguas Municipio',
 'Matanuska-Susitna Borough',
 'Ouachita Parish',
 'Lynchburg city',
 'Anchorage Municipality',
 'Fairbanks North Star Borough',
 'District of Columbia',
 'Calcasieu Parish',
 'East Baton Rouge Parish',
 'St. Landry Parish',
 'Tangipahoa Parish',
 'Newport News city',
 'Norfolk city',
 'Portsmouth city',
 'Virginia Beach city',
 'Caddo Parish',
 'Livingston Parish',
 'Rapides Parish',
 'Carolina Municipio',
 'San Juan Municipio',
 'Trujillo Alto Municipio',
 'Arecibo Municipio',
 'Mayagüez Municipio',
 'Ascension Parish',
 'Orleans Parish',
 'Terrebonne Parish',
 'Chesapeake city',
 'Hampton city',
 'Richmond city',
 'Roanoke city',
 'Guaynabo Municipio',
 'Bossier Parish',
 'Iberia Parish',
 'St. Tammany Parish',
 'Bayamón Municipio',
 'Ponce Municipio',
 'Toa Alta Municipio',
 'Toa Baja Municipio']

In [8]:
# airport info downloaded from: https://github.com/jpatokal/openflights and described at https://openflights.org/data.html
airports = pd.read_csv("airports.dat", header=None)
airports.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1,Goroka Airport,Goroka,Papua New Guinea,GKA,AYGA,-6.0817,145.392,5282,10,U,Pacific/Port_Moresby,airport,OurAirports
1,2,Madang Airport,Madang,Papua New Guinea,MAG,AYMD,-5.2071,145.789,20,10,U,Pacific/Port_Moresby,airport,OurAirports
2,3,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,AYMH,-5.8268,144.296,5388,10,U,Pacific/Port_Moresby,airport,OurAirports
3,4,Nadzab Airport,Nadzab,Papua New Guinea,LAE,AYNZ,-6.5698,146.726,239,10,U,Pacific/Port_Moresby,airport,OurAirports
4,5,Port Moresby Jacksons International Airport,Port Moresby,Papua New Guinea,POM,AYPY,-9.4434,147.22,146,10,U,Pacific/Port_Moresby,airport,OurAirports


In [120]:
airports.columns = ['openflights_id', 'name', 'city', 'country', 'IATA', 'ICAO', 'lat', 'lng', 'alt', 'timezone', 'DST', 'Tz', 'type', 'source']

In [121]:
# limit airports to those in US
airports = airports[airports['country'] == 'United States']
airports.head()

Unnamed: 0,openflights_id,name,city,country,IATA,ICAO,lat,lng,alt,timezone,DST,Tz,type,source
3212,3411,Barter Island LRRS Airport,Barter Island,United States,BTI,PABA,70.134,-143.582,2,-9,A,America/Anchorage,airport,OurAirports
3213,3412,Wainwright Air Station,Fort Wainwright,United States,\N,PAWT,70.6134,-159.86,35,-9,A,America/Anchorage,airport,OurAirports
3214,3413,Cape Lisburne LRRS Airport,Cape Lisburne,United States,LUR,PALU,68.8751,-166.11,16,-9,A,America/Anchorage,airport,OurAirports
3215,3414,Point Lay LRRS Airport,Point Lay,United States,PIZ,PPIZ,69.7329,-163.005,22,-9,A,America/Anchorage,airport,OurAirports
3216,3415,Hilo International Airport,Hilo,United States,ITO,PHTO,19.7214,-155.048,38,-10,N,Pacific/Honolulu,airport,OurAirports


In [122]:
airports[airports['timezone'] == '-8']

Unnamed: 0,openflights_id,name,city,country,IATA,ICAO,lat,lng,alt,timezone,DST,Tz,type,source
3234,3433,Marina Municipal Airport,Fort Ord,United States,OAR,KOAR,36.6819,-121.762,137,-8,A,America/Los_Angeles,airport,OurAirports
3235,3434,Sacramento Mather Airport,Sacramento,United States,MHR,KMHR,38.5539,-121.298,98,-8,A,America/Los_Angeles,airport,OurAirports
3236,3435,Bicycle Lake Army Air Field,Fort Irwin,United States,BYS,KBYS,35.2805,-116.630,2350,-8,A,America/Los_Angeles,airport,OurAirports
3237,3436,Twentynine Palms (Self) Airport,Twenty Nine Palms,United States,\N,KNXP,34.2962,-116.162,2051,-8,A,America/Los_Angeles,airport,OurAirports
3250,3449,Travis Air Force Base,Fairfield,United States,SUU,KSUU,38.2627,-121.927,62,-8,A,America/Los_Angeles,airport,OurAirports
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7311,12185,Tracy Municipal Airport,Tracy,United States,\N,KTCY,37.6890,-121.442,193,-8,A,\N,airport,OurAirports
7326,12528,Skagit Regional Airport,Skagit,United States,MVW,KBVS,48.4709,-122.421,144,-8,A,\N,airport,OurAirports
7360,13018,Las Vegas Helicopters Heliport,Las vegas,United States,\N,NV03,36.1081,-115.174,2133,-8,A,\N,airport,OurAirports
7403,13189,Santa Ynez Airport,Santa Ynez,United States,SQA,KIZA,34.6068,-120.076,674,-8,A,\N,airport,OurAirports


In [123]:
# limit the airports to those that are 'international' to remove air force bases, municipal airports, etc.
print("before: ", airports.shape)
airports = airports[airports['name'].str.contains('International')]
print("after: ", airports.shape)

before:  (1512, 14)
after:  (170, 14)


In [138]:
# convert lat/long to county and state?
# create column 'airport' (has international airport), add to census data

In [139]:
one = airports[5:6]
one

Unnamed: 0,openflights_id,name,city,country,IATA,ICAO,lat,lng,alt,timezone,DST,Tz,type,source
3259,3458,Kansas City International Airport,Kansas City,United States,MCI,KMCI,39.2976,-94.7139,1026,-6,A,America/Chicago,airport,OurAirports


In [141]:
# using FCC website https://geo.fcc.gov/api/census/#!/area/get_area to get county name for each airport
# adapted from https://gis.stackexchange.com/questions/77048/local-geolocation-to-fips-county-using-latitude-and-longitude

with urllib.request.urlopen("https://geo.fcc.gov/api/census/area?lat=39.2976&lon=-94.7139&format=json") as url:
    data = json.loads(url.read().decode())
    #print(data)

#print(json.dumps(data, indent=4, sort_keys=True))

#print(data['results'][0]['state_fips'])
#print(data['results'][0]['county_fips'])
print(data['results'][0]['county_name'])

{'input': {'lat': 39.2976, 'lon': -94.7139}, 'results': [{'block_fips': '291650303071120', 'bbox': [-94.714018, 39.296744, -94.713555, 39.298155], 'county_fips': '29165', 'county_name': 'Platte', 'state_fips': '29', 'state_code': 'MO', 'state_name': 'Missouri', 'block_pop_2015': 0, 'amt': 'AMT004', 'bea': 'BEA099', 'bta': 'BTA226', 'cma': 'CMA024', 'eag': 'EAG005', 'ivm': 'IVM024', 'mea': 'MEA029', 'mta': 'MTA034', 'pea': 'PEA030', 'rea': 'REA004', 'rpc': 'RPC004', 'vpc': 'VPC004'}, {'block_fips': '291650303071142', 'bbox': [-94.714825, 39.296889, -94.713936, 39.298238], 'county_fips': '29165', 'county_name': 'Platte', 'state_fips': '29', 'state_code': 'MO', 'state_name': 'Missouri', 'block_pop_2015': 0, 'amt': 'AMT004', 'bea': 'BEA099', 'bta': 'BTA226', 'cma': 'CMA024', 'eag': 'EAG005', 'ivm': 'IVM024', 'mea': 'MEA029', 'mta': 'MTA034', 'pea': 'PEA030', 'rea': 'REA004', 'rpc': 'RPC004', 'vpc': 'VPC004'}]}
Platte


In [163]:
with urllib.request.urlopen("https://geo.fcc.gov/api/census/area?lat=19.721399307250977&lon=-155.04800415039062&format=json") as url:
    data = json.loads(url.read().decode())
print(data['results'][0]['county_name'])

Hawaii


In [None]:
counties = list()
for x in range(0,airports.shape[0]):
    string = "https://geo.fcc.gov/api/census/area?lat=" + str(airports.iloc[x]['lat']) + "&lon=" + str(airports.iloc[x]['lng']) + "&format=json"
    with urllib.request.urlopen(string) as url:
        data = json.loads(url.read().decode())
    counties.append(data['results'][0]['county_name'])
    time.sleep(2)

In [None]:
counties