In [2]:
# usual imports
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

## Census 1

In [4]:
# As of Jan 2017, this census file is online here: 
data = 'http://www2.census.gov/programs-surveys/popest/datasets/2010-2015/national/asrh/nc-est2015-agesex-res.csv'

# A copy can be accessed here in case census.gov moves the file:
# data = 'http://inferentialthinking.com/notebooks/nc-est2015-agesex-res.csv'
# or via a local copy
# data = 'nc-est2015-agesex-res.csv'


full_census_table = Table.read_table(data)

partial = full_census_table.select(['SEX', 'AGE', 4, 9])
us_pop = partial.relabeled(2, '2010').relabeled(3, '2015')
ratio = (us_pop.column(3) / us_pop.column(2))

census = us_pop.with_columns(  # add columns with computed info
        'Change', us_pop.column(3) - us_pop.column(2), 
        'Total Growth', ratio - 1,
        'Annual Growth', ratio ** (1/5) - 1)
census.set_format([2, 3, 4], NumberFormatter)  # show column as percentage
census.set_format([5, 6], PercentFormatter)

SEX,AGE,2010,2015,Change,Total Growth,Annual Growth
0,0,3951330,3978038,26708,0.68%,0.13%
0,1,3957888,3968564,10676,0.27%,0.05%
0,2,4090862,3966583,-124279,-3.04%,-0.62%
0,3,4111920,3974061,-137859,-3.35%,-0.68%
0,4,4077551,4020035,-57516,-1.41%,-0.28%
0,5,4064653,4018158,-46495,-1.14%,-0.23%
0,6,4073013,4019207,-53806,-1.32%,-0.27%
0,7,4043046,4148360,105314,2.60%,0.52%
0,8,4025604,4167887,142283,3.53%,0.70%
0,9,4125415,4133564,8149,0.20%,0.04%


In [None]:
# SEX == 1 is female, == 2 is male, 0 is the combined total 
# select out "both" entries
by_age = census.where('SEX', 0).drop('SEX').where('AGE', are.below(999))
by_age

In [None]:
# plot 2010 and 2015 population vs age 
# (Would be easier to understand if the column names were used)
by_age.select(0, 1, 2).plot(0)

In [None]:
# plot change by age; does the structure make sense?
by_age.select("AGE", "Change").plot(0)

In [None]:
# which ages have the most change?
# 'Change' is the number difference from 2010 to 2015 at that age
# 'Total Growth' and 'Annual Growth' are 2010 to 2015 percentages at that age
by_age.sort("Change", descending=True)

In [None]:
# When were those people born?
2010-68 

In [None]:
2015-68 

In [None]:
# What do you think is the cause of that big increase?

In [None]:
# look at the top end (100 bin includes 101, 102, etc)
by_age.where('AGE', are.above(89)).show()

In [None]:
# Can you show whether people are living longer? How do you account for birth rate and immigration differences?

In [None]:
# As a hint, let's add a column that's the difference from year to year
# i.e. 2010 age 1 - 2010 age 0 is 6558 more people
with_delta2010 = by_age.with_column('2010 Diff', by_age.to_df()['2010'].diff())
with_delta2010.where('AGE', are.above(89)).show()
# Can you make this a fraction?  Do the same for 2015?

In [None]:
# now make a bar chart for easy comparison (you'll have to create a table with just specific columns)

In [None]:
# Look at the changes by age between 60 and 70. See anything striking?