# Census data API

Using Census data from ACS 1-Year Detailed Tables <br />
 https://api.census.gov/data/2016/acs/acs1/variables.html <br />
 https://api.census.gov/data/2016/acs/acs1/examples.html <br />
BY state› congressional district api calls:	 <br />
    https://api.census.gov/data/2016/acs/acs1?get=B01001_001E,NAME&for=congressional%20district:*&key=YOUR_KEY_GOES_HERE	
    https://api.census.gov/data/2016/acs/acs1?get=B01001_001E,NAME&for=congressional%20district:01&in=state:01&key=YOUR_KEY_GOES_HERE

            

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import time
from datetime import datetime
from api_keys import api_key

### Population
B01001_001E: UNWEIGHTED SAMPLE COUNT OF THE POPULATION <br />
B01002_001E - B01002_003E:	MEDIAN AGE <br />

In [2]:
base_url="https://api.census.gov/data/2016/acs/acs1?get="
variables='B01001_001E,B01002_001E,B01002_002E,B01002_003E'
district_base="&for=congressional%20district:"
district='*'
key='&key='+api_key
url=base_url+variables+district_base+district+key
print(url)

https://api.census.gov/data/2016/acs/acs1?get=B01001_001E,B01002_001E,B01002_002E,B01002_003E&for=congressional%20district:*&key=1f9d38260ccd918d7f0c97fffa5b0ff0feb50607


In [3]:
def getAPIData(purl):
    response = requests.get(purl)
    res=response.json()
    pdf=pd.DataFrame(res)
    return pdf

In [4]:
# set column values as should be instead of being in the first row, drop first row
cdist_df=getAPIData(url)
cdist_df.head()
cdist_df.columns=['population','mAgeTotal','mAgeM','mAgeF','state','cDistrict']
population_df=cdist_df.drop(cdist_df.index[0])
population_df.head()

Unnamed: 0,population,mAgeTotal,mAgeM,mAgeF,state,cDistrict
1,704457,39.6,38.3,41.0,1,1
2,681621,38.2,36.7,40.1,1,2
3,709482,38.3,36.6,39.8,1,3
4,683273,40.7,39.7,42.0,1,4
5,712529,40.1,38.4,41.8,1,5


### Education
B15003_001E - B15003_025E: EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER <br />

In [5]:
#Education , age 25 and older by congressional district
variables=[]
for num in range(1,26):
    if num < 10:
        variables.append('B15003_00'+str(num)+'E,')
    else:
        variables.append('B15003_0'+str(num)+'E,')

url=base_url
for v in variables:
    url+=v
# remove last comma
url=url[:-1]
url+=district_base+district+key
#print(url)
edu_df=getAPIData(url)
edu_df.columns=['schoolTotal','No Schooling','Nursery','Kinder','1st','2nd','3rd','4th','5th','6th','7th','8th','9th','10th','11th','12th nod','12th d','GEDalt','colUnder1yr','colOver1yr','Associates','Bachelors','Masters','ProfSchool','Doctorate','state','cDistrict']
edu_df=edu_df.drop(edu_df.index[0])
edu_df.head()

Unnamed: 0,schoolTotal,No Schooling,Nursery,Kinder,1st,2nd,3rd,4th,5th,6th,...,GEDalt,colUnder1yr,colOver1yr,Associates,Bachelors,Masters,ProfSchool,Doctorate,state,cDistrict
1,483628,6173,0,89,0,177,147,168,659,1664,...,23782,28984,72702,45045,73720,31803,7552,3179,1,1
2,464210,5620,0,0,372,319,1258,852,1492,2796,...,27056,29275,75042,35913,64329,27290,5782,3768,1,2
3,472077,7650,0,0,30,50,524,557,1471,2651,...,28007,30704,76546,36027,64031,32061,5894,5499,1,3
4,472340,11335,137,31,180,372,1388,1369,1271,5880,...,31974,33496,69006,39050,51327,21812,4541,2741,1,4
5,490229,6742,76,153,86,752,636,436,1753,3316,...,31714,30612,70382,37053,90532,46215,5877,5774,1,5



### Poverty and Income
B17001_001E - B17001_003E, B17001_017E: POVERTY STATUS IN THE PAST 12 MONTHS BY SEX (income below poverty level) <br />
B17001_031E, B17001_032E, B17001_046E: POVERTY STATUS IN THE PAST 12 MONTHS BY SEX (income above poverty level) <br />
B19051_001E - B190051_003E: EARNINGS IN THE PAST 12 MONTHS FOR HOUSEHOLDS <br />
B19052_001E, B19052_002E, B19052_002E: WAGE OR SALARY INCOME IN THE PAST 12 MONTHS FOR HOUSEHOLDS <br />

In [6]:
# poverty and Income
variables='B17001_001E,B17001_002E,B17001_003E,B17001_017E,B17001_031E,B17001_032E,B17001_046E,B19051_001E,B19051_002E,B19051_003E,B19052_001E,B19052_002E,B19052_003E' 
url=base_url+variables+district_base+district+key
pincome_df=getAPIData(url)
#pincome_df.columns=[pincome_df.iloc[0,:]]  # if i wanted the variable names
#ps: poverty status, O:Over, U:Under , M:male, F:Female, E:earnings, W:wage
pincome_df.columns=['piTotal',
                    'psUTotal','psUM','psUF',
                    'psOTotal','psOM','psOF',
                    'eTotal','woE','withE', 
                    'wTotal','woW','withW',
                    'state','cDistrict']
pincome_df=pincome_df.drop(pincome_df.index[0])
pincome_df.head()

Unnamed: 0,piTotal,psUTotal,psUM,psUF,psOTotal,psOM,psOF,eTotal,woE,withE,wTotal,woW,withW,state,cDistrict
1,688090,121383,51861,69522,566707,274251,292456,264399,186761,77638,264399,179447,84952,1,1
2,659925,127415,56218,71197,532510,261706,270804,259766,185064,74702,259766,177866,81900,1,2
3,688556,113480,51737,61743,575076,281468,293608,270850,198058,72792,270850,191269,79581,1,3
4,674872,120194,51625,68569,554678,278648,276030,256036,170941,85095,256036,163476,92560,1,4
5,695803,103145,45777,57368,592658,294794,297864,278329,206725,71604,278329,199528,78801,1,5



### Health coverage
B27001_001E, B27001_002E, B27001_030E: HEALTH INSURANCE COVERAGE STATUS BY SEX BY AGE <br />


In [7]:
variables='B27001_001E,B27001_002E,B27001_030E'
url=base_url+variables+district_base+district+key
healthc_df=getAPIData(url)
healthc_df.columns=['healthTotal','healthM','healthF','state','cDistrict']
healthc_df=healthc_df.drop(healthc_df.index[0])
healthc_df.head()


Unnamed: 0,healthTotal,healthM,healthF,state,cDistrict
1,691182,327312,363870,1,1
2,661346,315970,345376,1,2
3,696843,335042,361801,1,3
4,677025,330956,346069,1,4
5,701156,342268,358888,1,5


In [8]:
#popfile='population.csv'
#edufile='education.csv'
#incomefile='incomefile.csv'
#healthfile='healthfile.csv'
#population_df.to_csv(popfile)
#edu_df.to_csv(edufile)
#pincome_df.to_csv(incomefile)
#healthc_df.to_csv(healthfile)


In [9]:
# merge all and single csv file
pop_edu_df= pd.merge(population_df,edu_df, how='inner', on=['state','cDistrict'])
pop_edu_i_df=pd.merge(pop_edu_df,pincome_df, how='inner', on=['state','cDistrict'])
all_df=pd.merge(pop_edu_i_df,healthc_df, how='inner', on=['state','cDistrict'])
all_df.head()



Unnamed: 0,population,mAgeTotal,mAgeM,mAgeF,state,cDistrict,schoolTotal,No Schooling,Nursery,Kinder,...,psOF,eTotal,woE,withE,wTotal,woW,withW,healthTotal,healthM,healthF
0,704457,39.6,38.3,41.0,1,1,483628,6173,0,89,...,292456,264399,186761,77638,264399,179447,84952,691182,327312,363870
1,681621,38.2,36.7,40.1,1,2,464210,5620,0,0,...,270804,259766,185064,74702,259766,177866,81900,661346,315970,345376
2,709482,38.3,36.6,39.8,1,3,472077,7650,0,0,...,293608,270850,198058,72792,270850,191269,79581,696843,335042,361801
3,683273,40.7,39.7,42.0,1,4,472340,11335,137,31,...,276030,256036,170941,85095,256036,163476,92560,677025,330956,346069
4,712529,40.1,38.4,41.8,1,5,490229,6742,76,153,...,297864,278329,206725,71604,278329,199528,78801,701156,342268,358888


In [11]:
now = datetime.now()
filen="censusData"+now.strftime("%Y%m%d") +".csv"
all_df.to_csv(filen)