### Pull County-Level ACS 1-year and 5-year data using Census library and API

In [None]:
#Note 1-year data is only available for counties with population of 65K or greater. 

#Used this notebook as inspiration: 
#https://github.com/BuzzFeedNews/2020-02-gentrification/blob/master/notebooks/01-download-census-data.ipynb. 

# County shape file https://data.ca.gov/dataset/ca-geographic-boundaries/resource/b0007416-a325-4777-9295-368ea6b710e6
# County code to name mapping website
# https://www.census.gov/geographies/reference-files/2017/demo/popest/2017-fips.html

In [None]:
#installations
!pip install census
!pip install us

You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.[0m
You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.[0m


In [None]:
# Dependencies
import pandas as pd
import requests
from census import Census
from us import states

### Pull and Clean County-Level Data

In [None]:
county_list = pd.read_csv('/work/assets/all-geocodes.csv', error_bad_lines=False)
county_list = county_list.iloc[3:,:] #drop header info

new_header = county_list.iloc[0] #grab the first row for the header
county_list = county_list[1:] #take the data less the header row
county_list.columns = new_header #set the header row as the df header
county_list = county_list[county_list['State Code (FIPS)']=='06'] #only include counties in CA


county_list = county_list.groupby('County Code (FIPS)').agg(lambda x:x.value_counts().index[0]) #get one row per county

county_list = county_list.iloc[1:,:].reset_index()
county_list = county_list[['County Code (FIPS)', 'State Code (FIPS)',
       'Area Name (including legal/statistical area description)']]

county_list = county_list.rename({'County Code (FIPS)':"county_code", 'State Code (FIPS)':"state_code",
       'Area Name (including legal/statistical area description)':'county'},axis='columns')

county_list['county_code'].nunique()


58

In [None]:
#Create csv of counties to use for other data pulls
county_list.to_csv('/work/output/ca_counties.csv', index = False)


3,county_code,state_code,county
0,1,6,Alameda County
1,3,6,Alpine County
2,5,6,Amador County
3,7,6,Butte County
4,9,6,Calaveras County
5,11,6,Colusa County
6,13,6,Contra Costa County
7,15,6,Del Norte County
8,17,6,El Dorado County
9,19,6,Fresno County


### Pull Census 1-year data and select variables

In [None]:
c = Census('307a9c458f89f32c9cf817fb1ae4f426ccc84e2c')
#This is the key to use the API. It was obtained at https://api.census.gov/data/key_signup.html


In [None]:
#List of variables available at https://api.census.gov/data/2019/acs/acs1/variables.html 
categories = [
     'NAME', # county name
     'B01001_001E', # Total population
     'B15002_001E', # Total population 25 and over
     'B17001_002E', #total poverty status - income in past 12 months below poverty level
     'B19013_001E', # Median income
     'B25111_001E', #Median Gross Rent
     'B25077_001E', # Median home value
     'B15011_001E', # Total population age 25+ years with a bachelor's degree or higher
     'B03002_003E', # Not Hispanic or Latino!!White alone
     'B03002_004E', # Not Hispanic or Latino!!Black or African American alone
     'B02001_004E', # American Indian and Alaska Native Alone
     'B03002_006E', # Not Hispanic or Latino!!Asian alone
     'B03002_007E', # Not Hispanic or Latino!!Native Hawaiian and Other Pacific Islander alone
     'B03002_008E', # Not Hispanic or Latino!!Some other race alone
     'B03002_009E', # Not Hispanic or Latino!!Two or more races
     'B03002_012E', # Hispanic or Latino
]

#Fields we might want to add:
#"C08134_001E",#"Estimate!!Total:","MEANS OF TRANSPORTATION TO WORK BY TRAVEL TIME TO WORK"

In [None]:
# https://pypi.org/project/census/ list of geography levels available
#This function pulls county level data for acs 1-year
# def get_acs_data(state_code, county_code, timeperiod, county):
#     results = c.acs1.state_county(
#         categories,
#         '06', #This is CA
#         county_code,
#         #Census.ALL,
#         year = timeperiod
#     )


#     return [ {
#         'geoid': res['state'] + res['county'],
#         'name': res['NAME'],
#         'year':year,
#         'total_population': res['B01001_001E'],
#         'total_population_25_over': res['B15002_001E'],
#         'median_income': res['B19013_001E'],
#         'median_gross_rent': res['B25111_001E'],
#         'median_home_value': res['B25077_001E'],
#         'poverty_total': res['B17001_002E'],

#         'educational_attainment': res['B15011_001E'],
#         'white_alone_non_hisp': res['B03002_003E'],
#         'black_alone_non_hisp': res['B03002_004E'],
#         'native_alone': res['B02001_004E'],
#         'asian_alone': res['B03002_006E'],
#         'native_hawaiian_pacific_islander': res['B03002_007E'],
#         'some_other_race_alone': res['B03002_008E'],
#         'two_or_more': res['B03002_009E'],
#         'hispanic_or_latino': res['B03002_012E'],
#         'county': county,
#     } for res in results ]


    # https://pypi.org/project/census/ list of geography levels available
#This function pulls county level data for acs 1-year
def get_acs_data(state_code, county_code, timeperiod, county, yr_5):
    
    if yr_5 == 'year_5':
        results = c.acs5.state_county(
            categories,
            '06', #This is CA
            county_code,
            #Census.ALL,
            year = timeperiod
        )

    else:
        results = c.acs1.state_county(
            categories,
            '06', #This is CA
            county_code,
            #Census.ALL,
            year = timeperiod
        )


    return [ {
        'geoid': res['state'] + res['county'],
        'name': res['NAME'],
        'year':year,
        'total_population': res['B01001_001E'],
        'total_population_25_over': res['B15002_001E'],
        'median_income': res['B19013_001E'],
        'median_gross_rent': res['B25111_001E'],
        'median_home_value': res['B25077_001E'],
        'poverty_total': res['B17001_002E'],

        'educational_attainment': res['B15011_001E'],
        'white_alone_non_hisp': res['B03002_003E'],
        'black_alone_non_hisp': res['B03002_004E'],
        'native_alone': res['B02001_004E'],
        'asian_alone': res['B03002_006E'],
        'native_hawaiian_pacific_islander': res['B03002_007E'],
        'some_other_race_alone': res['B03002_008E'],
        'two_or_more': res['B03002_009E'],
        'hispanic_or_latino': res['B03002_012E'],
        'county': county,
    } for res in results ]

In [None]:
# Unhashtag the following code to see the 1-year ACS data at the 
# County level for California. This represents 41 counties

# c.acs1.state_county(
#         categories,
#         '06',
#         #state_code,
#         Census.ALL,
#         year = 2019
#     )

In [None]:

county_list["metro_area_name"] = 'CA'
metro_area_counties = county_list#.iloc[:1,:]


In [None]:
census_data = []
years=list(range(2019,2020))

for year in years:
    for index, county in metro_area_counties.iterrows():
        print(county["county"])
        
        census_data += get_acs_data(
            county["state_code"], 
            county["county_code"], 
            year, 
            county["county"], 
            'year_1'
        )

census_data_1yr = pd.DataFrame(census_data)

Alameda County
Alpine County
Amador County
Butte County
Calaveras County
Colusa County
Contra Costa County
Del Norte County
El Dorado County
Fresno County
Glenn County
Humboldt County
Imperial County
Inyo County
Kern County
Kings County
Lake County
Lassen County
Los Angeles County
Madera County
Marin County
Mariposa County
Mendocino County
Merced County
Modoc County
Mono County
Monterey County
Napa County
Nevada County
Orange County
Placer County
Plumas County
Riverside County
Sacramento County
San Benito County
San Bernardino County
San Diego County
San Francisco County
San Joaquin County
San Luis Obispo County
San Mateo County
Santa Barbara County
Santa Clara County
Santa Cruz County
Shasta County
Sierra County
Siskiyou County
Solano County
Sonoma County
Stanislaus County
Sutter County
Tehama County
Trinity County
Tulare County
Tuolumne County
Ventura County
Yolo County
Yuba County


In [None]:
census_data_1yr

Unnamed: 0,geoid,name,year,total_population,total_population_25_over,median_income,median_gross_rent,median_home_value,poverty_total,educational_attainment,white_alone_non_hisp,black_alone_non_hisp,native_alone,asian_alone,native_hawaiian_pacific_islander,some_other_race_alone,two_or_more,hispanic_or_latino,county
0,6001,"Alameda County, California",2019,1671329.0,1195107.0,108322.0,1982.0,882100.0,145561.0,604814.0,508598.0,172718.0,12648.0,517004.0,12858.0,7922.0,73656.0,373055.0,Alameda County
1,6007,"Butte County, California",2019,219186.0,143815.0,62563.0,1087.0,336600.0,34050.0,43830.0,155136.0,3758.0,2333.0,9763.0,527.0,279.0,10925.0,37731.0,Butte County
2,6013,"Contra Costa County, California",2019,1153526.0,802187.0,107135.0,1948.0,687600.0,90508.0,344780.0,489677.0,99615.0,5863.0,203261.0,4020.0,4412.0,49862.0,300420.0,Contra Costa County
3,6017,"El Dorado County, California",2019,192843.0,142068.0,87059.0,1308.0,502000.0,17736.0,50191.0,148711.0,1409.0,2540.0,9991.0,125.0,380.0,4856.0,25378.0,El Dorado County
4,6019,"Fresno County, California",2019,999101.0,621440.0,57518.0,1034.0,288300.0,202698.0,136816.0,285269.0,43602.0,13588.0,101152.0,1811.0,2921.0,20915.0,537180.0,Fresno County
5,6023,"Humboldt County, California",2019,135558.0,93098.0,51662.0,1006.0,348000.0,25886.0,28292.0,100078.0,1790.0,7073.0,3525.0,539.0,49.0,7020.0,16354.0,Humboldt County
6,6025,"Imperial County, California",2019,181215.0,111755.0,48472.0,810.0,229600.0,42687.0,20569.0,18139.0,3624.0,3791.0,2269.0,0.0,22.0,915.0,154088.0,Imperial County
7,6029,"Kern County, California",2019,900202.0,551956.0,53067.0,949.0,235800.0,166768.0,94296.0,295068.0,44566.0,8906.0,41093.0,907.0,316.0,22422.0,491545.0,Kern County
8,6031,"Kings County, California",2019,152940.0,95083.0,58453.0,969.0,247600.0,21063.0,16745.0,47938.0,8130.0,2804.0,4951.0,460.0,0.0,5511.0,84514.0,Kings County
9,6033,"Lake County, California",2019,64386.0,47186.0,47138.0,992.0,255200.0,12019.0,,,,2540.0,,,,,,Lake County


### Pull ACS 5 Year Data for remaing 17 Counties

In [None]:
# We can pull one-year data for the  Counties with population greater than 65K. 
# For the 2nd half of counties, will have to use 5-year
#Create list of all counties that aren't represented

census_county = census_data_1yr[census_data_1yr['year']==2019]['county']


df_all = county_list.merge(census_county, on=['county'], 
                   how='left', indicator=True)
df_all
counties_5year = df_all[df_all['_merge']=='left_only']
counties_5year['metro_area_name']='CA'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':


In [None]:
census_data = []
years=list(range(2019,2020))

for year in years:
    for index, county in counties_5year.iterrows():
        print(county["county"])
        
        census_data += get_acs_data(
            county["state_code"], 
            county["county_code"], 
            year, 
            county["county"],
            'year_5'
        )

census_data_5yr = pd.DataFrame(census_data)

Alpine County
Amador County
Calaveras County
Colusa County
Del Norte County
Glenn County
Inyo County
Lassen County
Mariposa County
Modoc County
Mono County
Plumas County
San Benito County
Sierra County
Siskiyou County
Trinity County
Tuolumne County


In [None]:
all_census = pd.concat([census_data_1yr, census_data_5yr])

all_census.to_csv('/work/output/census_data.csv', index = False)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=f6c76417-5fde-42f3-8920-755838dec3fa' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>