# Census API Example C
Hard code some variables, read list of geographies in from a file, separate this list into sublists of 50 items, loop through sublists to retrieve data, write output to dataframe and csv. It's easier to read long lists of variables in from a file rather than hardcoding them. You cannot insert more than 50 variables (columns or geographies) into one request, so you need to break longer lists apart, and loop through the sublists to make multiple requests. 

Example uses ZIP Code Business Patterns 2017:
https://www.census.gov/data/developers/data-sets/cbp-nonemp-zbp/zbp-api.2017.html

## Set Variables

In [None]:
import pandas as pd, requests, os, json
from IPython.display import clear_output

In [None]:
#Set variables
year='2017'
dsource='zbp'
cols='ESTAB,EMP,PAYANN'
geofile='nyc_zips.csv'
outputcsv='nyc_zbpdata_2017.csv'

In [None]:
base_url = f'https://api.census.gov/data/{year}/{dsource}'
base_url

In [None]:
#Read in file of ZIP codes
nyczips=pd.read_csv(geofile, sep=',', dtype={'zipcodes':str})
nyczips.head()

In [None]:
#Number of rows and columns
nyczips.shape

In [None]:
# For item i in a range that is a length of l,
# create an index range for l of n items
def chunks(l, n):
    for i in range(0, len(l), n):
        yield l[i:i+n]

In [None]:
#Creates a list of lists, with equal number of items in each sublist
ziplist=list(chunks(nyczips.zipcodes.tolist(),50))
print('Number of chunks:',len(ziplist))

In [None]:
#Shows items in the first two sublists
print(ziplist[0:2])

## Retrieve Data

In [None]:
#Iterate through index and value of each sublist to retrieve data
emp_data=[]
for i, v in enumerate (ziplist):
    batchzips=','.join(v) #Convert items in sublist to a string separated by commas
    data_url = f'{base_url}?get={cols}&for=zipcode:{batchzips}'
    response=requests.get(data_url)
    if response.status_code==200: #Code 200 = success
        clear_output(wait=True)
        data=response.json()
        if i == 0: #If this is the 1st sublist, we want to append everything    
            for record in data:
                emp_data.append(record)
        else: #If it's not, we don't want to append the column headers again
            for record in data[1:]:
                emp_data.append(record) 
        print('Retrieved data for chunk',i)
    else:
        print('***Problem with retrieval***, response code',response.status_code)
        break
print('Done')

In [None]:
#Number of ZIP Codes will be different, as not all ZIPs have data
len(emp_data)

In [None]:
#Preview nested list created from loop
emp_data[0:5]

## Generate Output

In [None]:
#Create dataframe from list
empdf=pd.DataFrame(emp_data[1:], columns=emp_data[0]).rename(columns={'zip code':'zipcode'}).set_index('zipcode')
for field in empdf.columns:
    empdf=empdf.astype(dtype={field:'int64'})
empdf.sort_values('EMP',ascending=False, inplace=True)
empdf.head()

In [None]:
#Write data out to csv file
empdf.to_csv(outputcsv)