In [None]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

# Configure for presentation
np.set_printoptions(threshold=50, linewidth=50)
import matplotlib as mpl
mpl.rc('font', size=16)

## Census 2: Age and Gender in Census Data

In [None]:
# this census file is online here: 
# http://www2.census.gov/programs-surveys/popest/datasets/2010-2015/national/asrh/nc-est2015-agesex-res.csv

full_census_table = Table.read_table('nc-est2015-agesex-res.csv')
partial = full_census_table.select('SEX', 'AGE', 'POPESTIMATE2010', 'POPESTIMATE2015')
us_pop = partial.relabeled(2, '2010').relabeled(3, '2015')
us_pop

In [None]:
us_pop.where('AGE', 70)

In [None]:
us_pop.where('AGE', 70).where([False, True, True]) # remove the "all" zero element

In [None]:
seventy = us_pop.where('AGE', 70)
seventy.column('2010') < 2000000

In [None]:
seventy.where(seventy.column('2010') < 2000000)

In [None]:
us_pop.column('2015') / us_pop.column('2010') > 1.5

In [None]:
us_pop.where(us_pop.column('2015') / us_pop.column('2010') > 1.5)

## Census & ZIP Codes

In [None]:
zips = Map.read_geojson('ca_zips.geojson.gz')
zips

In [None]:
zips_table = Table.from_records(zips.features)
zips_table.show(1)

In [None]:
berkeley = zips_table.where('ZIP', are.between('94700', '94799'))
berkeley.show(2)

In [None]:
Map(berkeley.column('feature'))

### Income data

In [None]:
income_raw = Table.read_table('ca_income_by_zip.csv', dtype={'ZIP': str})
income_raw

In [None]:
income_by_zipcode = income_raw.drop('STATEFIPS', 'STATE', 'agi_stub').group('ZIP', sum)
income = Table().with_columns(
        'ZIP', income_by_zipcode.column('ZIP'),
        'returns', income_by_zipcode.column('N02650 sum'),
        'total income', income_by_zipcode.column('A02650 sum'),
        'farmers', income_by_zipcode.column('SCHF sum')
    ).where('ZIP', are.not_equal_to('99999'))
income.set_format('total income', NumberFormatter(0)).show(5)

In [None]:
income.sort('total income', descending=True)

In [None]:
Map(zips_table.where('ZIP', '94301').column(1))

In [None]:
income.join('ZIP', zips_table).sort('total income', descending=True)

In [None]:
# Map all of the ZIP codes that have more than $1B returns.
Map(income.where('total income', are.above(1e6)).join('ZIP', zips_table).column('feature'))

In [None]:
# Map all of the ZIP codes that have more than 5% farm income
f = income.where(income.column('farmers') >= 0.05 * income.column('returns'))
Map(f.join('ZIP', zips_table).column('feature'))

In [None]:
# Map all of the ZIP codes that have more than 5% farm income and $100M
f = income.where(income.column('farmers') >= 0.05 * income.column('returns'))
both = f.where('total income', are.above(1e5))
Map(both.join('ZIP', zips_table).column('feature'))

In [None]:
f['total income'].max()

In [None]:
both = f.where('total income', are.above(500000))
Map(both.join('ZIP', zips_table).column('feature'))

In [None]:
home = income.where('ZIP', '94707') # Bob's home zip code
home

In [None]:
(home[2]/home[1])[0]   # average return (i.e. family) income