In [1]:
import settings
import requests
import json
import pandas as pd
from pandas import DataFrame, Series

In [2]:
# Race and ethnicity classified population fields from US Census:
P005_vars_list = ['P0050001', 'P0050003', 'P0050004', 'P0050005', 'P0050006',
                  'P0050007', 'P0050008', 'P0050009', 'P0050010']

# Get 2010 US Census (SF1) data for all tracts in Contra Costa County:
r = requests.get('http://api.census.gov/data/2010/sf1?key={0}&get={1}&for=tract:*\
&in=state:06+county:013'.format(settings.CENSUS_KEY, ','.join(P005_vars_list)))

In [3]:
# Definitions for manipulating the data to conform to these particular racial categories:
def sum_race_categories(x):
    return Series({'Tract':x['tract'],
                   'Total':x['P0050001'],
                   'White':x['P0050003'],
                   'Black':x['P0050004'],
                   'Asian':x['P0050006'],
                   'Hispanic':x['P0050010'],
                   'Other': x['P0050005'] + x['P0050007'] + x['P0050008'] + x['P0050009'],
                   }, index=['Tract', 'Total', 'White', 'Black', 'Hispanic', 'Asian', 'Other'])

def race_p(r):
    si = ['p_White', 'p_Black', 'p_Hispanic', 'p_Asian', 'p_Other']
    if r['Total'] == 0:
        return Series(dict(zip(si,5*[0])), index=si)
    else:
        return Series({'p_White': r['White']/r['Total'],
                       'p_Black': r['Black']/r['Total'],
                       'p_Hispanic': r['Hispanic']/r['Total'],
                       'p_Asian': r['Asian']/r['Total'],
                       'p_Other': r['Other']/r['Total']
                       }, index=si)

In [4]:
j = json.loads(r.text)
df1 = DataFrame(j[1:], columns=j[0])
df1[P005_vars_list] = df1[P005_vars_list].astype('int')
df1.head()

Unnamed: 0,P0050001,P0050003,P0050004,P0050005,P0050006,P0050007,P0050008,P0050009,P0050010,state,county,tract
0,3659,2590,171,17,153,6,7,118,597,6,13,301000
1,6592,3046,447,50,188,38,18,226,2579,6,13,302005
2,3779,1958,263,23,164,14,5,149,1203,6,13,302006
3,6347,2690,434,26,399,20,20,209,2549,6,13,302007
4,7718,3062,716,43,556,22,15,306,2998,6,13,302008


In [5]:
df2 = df1.apply(sum_race_categories, axis=1)
df2[['p_White', 'p_Black', 'p_Hispanic', 'p_Asian', 'p_Other']] = df2.apply(race_p, axis=1)
df2.head()

Unnamed: 0,Tract,Total,White,Black,Hispanic,Asian,Other,p_White,p_Black,p_Hispanic,p_Asian,p_Other
0,301000,3659,2590,171,597,153,148,0.707844,0.046734,0.163159,0.041815,0.040448
1,302005,6592,3046,447,2579,188,332,0.462075,0.067809,0.391232,0.028519,0.050364
2,302006,3779,1958,263,1203,164,191,0.518126,0.069595,0.318338,0.043398,0.050542
3,302007,6347,2690,434,2549,399,275,0.423822,0.068379,0.401607,0.062864,0.043328
4,302008,7718,3062,716,2998,556,386,0.396735,0.09277,0.388443,0.072039,0.050013


In [6]:
# The tract numbers labeled on the maps don't specify the last two digits
# unless they are non-zero.
df2[df2.Tract.str.contains('3770')]

Unnamed: 0,Tract,Total,White,Black,Hispanic,Asian,Other,p_White,p_Black,p_Hispanic,p_Asian,p_Other
186,377000,6962,487,1600,4396,316,163,0.069951,0.229819,0.631428,0.045389,0.023413


In [7]:
df2[df2.Tract.str.contains('3680')]

Unnamed: 0,Tract,Total,White,Black,Hispanic,Asian,Other,p_White,p_Black,p_Hispanic,p_Asian,p_Other
175,368001,5327,298,432,4176,333,88,0.055941,0.081096,0.783931,0.062512,0.01652
176,368002,3404,275,278,2402,333,116,0.080787,0.081669,0.70564,0.097826,0.034078


In [8]:
# We probably want these... possibly also 3730, 3750?
# Note: 3780 is Point Richmond. Includes a lot of area to the west, upwind.
chevron_tracts = {'365002', '376000', '377000'}
def in_fenceline(x):
    return str(x) in chevron_tracts

fenceline_df = df2[df2.Tract.apply(in_fenceline)]
fenceline_df

Unnamed: 0,Tract,Total,White,Black,Hispanic,Asian,Other,p_White,p_Black,p_Hispanic,p_Asian,p_Other
169,365002,5462,170,1687,2947,451,207,0.031124,0.308861,0.539546,0.08257,0.037898
185,376000,5962,198,1882,3339,366,177,0.03321,0.315666,0.560047,0.061389,0.029688
186,377000,6962,487,1600,4396,316,163,0.069951,0.229819,0.631428,0.045389,0.023413


In [9]:
fenceline_df[['Total', 'White', 'Black', 'Hispanic', 'Asian', 'Other']].sum()

Total       18386
White         855
Black        5169
Hispanic    10682
Asian        1133
Other         547
dtype: int64

In [10]:
fenceline_df[['p_White', 'p_Black', 'p_Hispanic', 'p_Asian', 'p_Other']].mean()

p_White       0.044762
p_Black       0.284782
p_Hispanic    0.577007
p_Asian       0.063116
p_Other       0.030333
dtype: float64

In [11]:
fenceline_df[['p_White', 'p_Black', 'p_Hispanic', 'p_Asian', 'p_Other']].mean().sum()

0.99999999999999989