In [7]:
import numpy as np
from datascience import *
%matplotlib inline
np.set_printoptions(threshold=50)

## Population & Age

In [8]:
# As of Jan 2017, this census file is online here: 
data = 'http://www2.census.gov/programs-surveys/popest/datasets/2010-2015/national/asrh/nc-est2015-agesex-res.csv'

# A copy can be accessed here in case census.gov moves the file:
# data = 'http://inferentialthinking.com/notebooks/nc-est2015-agesex-res.csv'

full_census_table = Table.read_table(data)
full_census_table

SEX,AGE,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015
0,0,3944153,3944160,3951330,3963087,3926540,3931141,3949775,3978038
0,1,3978070,3978090,3957888,3966551,3977939,3942872,3949776,3968564
0,2,4096929,4096939,4090862,3971565,3980095,3992720,3959664,3966583
0,3,4119040,4119051,4111920,4102470,3983157,3992734,4007079,3974061
0,4,4063170,4063186,4077551,4122294,4112849,3994449,4005716,4020035
0,5,4056858,4056872,4064653,4087709,4132242,4123626,4006900,4018158
0,6,4066381,4066412,4073013,4074993,4097605,4142916,4135930,4019207
0,7,4030579,4030594,4043046,4083225,4084913,4108349,4155326,4148360
0,8,4046486,4046497,4025604,4053203,4093177,4095711,4120903,4167887
0,9,4148353,4148369,4125415,4035710,4063152,4104072,4108349,4133564


In [9]:
partial = full_census_table.select('SEX', 'AGE', 'POPESTIMATE2010', 'POPESTIMATE2015')
partial

SEX,AGE,POPESTIMATE2010,POPESTIMATE2015
0,0,3951330,3978038
0,1,3957888,3968564
0,2,4090862,3966583
0,3,4111920,3974061
0,4,4077551,4020035
0,5,4064653,4018158
0,6,4073013,4019207
0,7,4043046,4148360
0,8,4025604,4167887
0,9,4125415,4133564


In [10]:
us_pop = partial.relabeled(2, '2010').relabeled(3, '2015')
us_pop

SEX,AGE,2010,2015
0,0,3951330,3978038
0,1,3957888,3968564
0,2,4090862,3966583
0,3,4111920,3974061
0,4,4077551,4020035
0,5,4064653,4018158
0,6,4073013,4019207
0,7,4043046,4148360
0,8,4025604,4167887
0,9,4125415,4133564


In [11]:
change = us_pop.column('2015') - us_pop.column('2010')
census = us_pop.with_columns(
    'Change', change,
    'Percent Change', change/us_pop.column('2010')
)
census.set_format('Percent Change', PercentFormatter)

SEX,AGE,2010,2015,Change,Percent Change
0,0,3951330,3978038,26708,0.68%
0,1,3957888,3968564,10676,0.27%
0,2,4090862,3966583,-124279,-3.04%
0,3,4111920,3974061,-137859,-3.35%
0,4,4077551,4020035,-57516,-1.41%
0,5,4064653,4018158,-46495,-1.14%
0,6,4073013,4019207,-53806,-1.32%
0,7,4043046,4148360,105314,2.60%
0,8,4025604,4167887,142283,3.53%
0,9,4125415,4133564,8149,0.20%


## Advanced Where

In [12]:
3 > 2

True

In [13]:
1 > 2

False

In [14]:
np.arange(5) > 2

array([False, False, False,  True,  True], dtype=bool)

In [15]:
us_pop.where('AGE', 70)

SEX,AGE,2010,2015
0,70,2062577,2492490
1,70,954073,1162672
2,70,1108504,1329818


In [16]:
us_pop.where('AGE', 70).where([False, True, True])

SEX,AGE,2010,2015
1,70,954073,1162672
2,70,1108504,1329818


In [17]:
seventy = us_pop.where('AGE', 70)
seventy.column('2010') < 2000000

array([False,  True,  True], dtype=bool)

In [18]:
seventy.where(seventy.column('2010') < 2000000)

SEX,AGE,2010,2015
1,70,954073,1162672
2,70,1108504,1329818


In [19]:
us_pop.column('2010') > 4e6

array([False, False,  True, ..., False, False,  True], dtype=bool)

In [20]:
us_pop.where('2010', are.above(4e6))

SEX,AGE,2010,2015
0,2,4090862,3966583
0,3,4111920,3974061
0,4,4077551,4020035
0,5,4064653,4018158
0,6,4073013,4019207
0,7,4043046,4148360
0,8,4025604,4167887
0,9,4125415,4133564
0,10,4187062,4121289
0,11,4115511,4130328


In [21]:
us_pop.where(us_pop.column('2010') > 4e6)

SEX,AGE,2010,2015
0,2,4090862,3966583
0,3,4111920,3974061
0,4,4077551,4020035
0,5,4064653,4018158
0,6,4073013,4019207
0,7,4043046,4148360
0,8,4025604,4167887
0,9,4125415,4133564
0,10,4187062,4121289
0,11,4115511,4130328


In [22]:
# us_pop.where('AGE', 70).where(us_pop.column('2010') > 4e6)

In [23]:
us_pop.column('2015') / us_pop.column('2010') > 1.5

array([False, False, False, ..., False, False, False], dtype=bool)

In [24]:
us_pop.where(us_pop.column('2015') / us_pop.column('2010') > 1.5)

SEX,AGE,2010,2015
1,94,43827,68135
1,95,31736,48015
1,97,14775,23092
1,98,9505,14719
1,99,6104,9577
1,100,9352,15088


In [25]:
census.where('Percent Change', are.above(.5))

SEX,AGE,2010,2015,Change,Percent Change
1,94,43827,68135,24308,55.46%
1,95,31736,48015,16279,51.30%
1,97,14775,23092,8317,56.29%
1,98,9505,14719,5214,54.86%
1,99,6104,9577,3473,56.90%
1,100,9352,15088,5736,61.33%


In [26]:
us_pop.where('2010', are.above, '2015')

SEX,AGE,2010,2015
0,2,4090862,3966583
0,3,4111920,3974061
0,4,4077551,4020035
0,5,4064653,4018158
0,6,4073013,4019207
0,10,4187062,4121289
0,12,4113279,4101021
0,13,4119666,4084306
0,16,4313252,4184296
0,17,4376367,4194286
