## Review



In [5]:
import numpy as np
from datascience import *
%matplotlib inline
np.set_printoptions(threshold=50)

In [6]:
# This table can be found online: https://www.statcrunch.com/app/index.php?dataid=1843341
nba = Table.read_table('http://inferentialthinking.com/notebooks/nba_salaries.csv').relabeled(3, 'SALARY').drop(2)
nba

PLAYER,POSITION,SALARY
Paul Millsap,PF,18.6717
Al Horford,C,12.0
Tiago Splitter,C,9.75625
Jeff Teague,PG,8.0
Kyle Korver,SG,5.74648
Thabo Sefolosha,SF,4.0
Mike Scott,PF,3.33333
Kent Bazemore,SF,2.0
Dennis Schroder,PG,1.7634
Tim Hardaway Jr.,SG,1.30452


In [10]:
nba.where(1, 'PG').where(2, are.above_or_equal_to(15)).column(0)

array(['Derrick Rose', 'Kyrie Irving', 'Chris Paul', 'Russell Westbrook',
       'John Wall'], 
      dtype='<U24')

In [11]:
nba.with_row(['Sam Lau', 'Mascot', 100.0])
nba.where('PLAYER', are.containing('Lau'))

PLAYER,POSITION,SALARY
Joffrey Lauvergne,C,1.70972


## Population and Age

In [12]:
# As of Jan 2017, this census file is online here: 
data = 'http://www2.census.gov/programs-surveys/popest/datasets/2010-2015/national/asrh/nc-est2015-agesex-res.csv'

# A copy can be accessed here in case census.gov moves the file:
# data = 'http://inferentialthinking.com/notebooks/nc-est2015-agesex-res.csv'

full_census_table = Table.read_table(data)
full_census_table

SEX,AGE,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015
0,0,3944153,3944160,3951330,3963087,3926540,3931141,3949775,3978038
0,1,3978070,3978090,3957888,3966551,3977939,3942872,3949776,3968564
0,2,4096929,4096939,4090862,3971565,3980095,3992720,3959664,3966583
0,3,4119040,4119051,4111920,4102470,3983157,3992734,4007079,3974061
0,4,4063170,4063186,4077551,4122294,4112849,3994449,4005716,4020035
0,5,4056858,4056872,4064653,4087709,4132242,4123626,4006900,4018158
0,6,4066381,4066412,4073013,4074993,4097605,4142916,4135930,4019207
0,7,4030579,4030594,4043046,4083225,4084913,4108349,4155326,4148360
0,8,4046486,4046497,4025604,4053203,4093177,4095711,4120903,4167887
0,9,4148353,4148369,4125415,4035710,4063152,4104072,4108349,4133564


In [13]:
partial = full_census_table.select('SEX', 'AGE', 'POPESTIMATE2010', 'POPESTIMATE2015')
partial

SEX,AGE,POPESTIMATE2010,POPESTIMATE2015
0,0,3951330,3978038
0,1,3957888,3968564
0,2,4090862,3966583
0,3,4111920,3974061
0,4,4077551,4020035
0,5,4064653,4018158
0,6,4073013,4019207
0,7,4043046,4148360
0,8,4025604,4167887
0,9,4125415,4133564


In [14]:
us_pop = partial.relabeled('POPESTIMATE2010', '2010').relabeled('POPESTIMATE2015', '2015')
us_pop

SEX,AGE,2010,2015
0,0,3951330,3978038
0,1,3957888,3968564
0,2,4090862,3966583
0,3,4111920,3974061
0,4,4077551,4020035
0,5,4064653,4018158
0,6,4073013,4019207
0,7,4043046,4148360
0,8,4025604,4167887
0,9,4125415,4133564


In [15]:
us_pop.column('2010')

array([  3951330,   3957888,   4090862, ...,     26074,     45058,
       157258820])

In [17]:
us_pop.column('2015') - us_pop.column('2010')

array([  26708,   10676, -124279, ...,    7990,   16828, 5930703])

In [20]:
change = us_pop.column('2015') - us_pop.column('2010')
census = us_pop.with_columns(
    'Change', change,
    'Percent Change', change / us_pop.column('2010'),
)
census.set_format('Percent Change', PercentFormatter)

SEX,AGE,2010,2015,Change,Percent Change
0,0,3951330,3978038,26708,0.68%
0,1,3957888,3968564,10676,0.27%
0,2,4090862,3966583,-124279,-3.04%
0,3,4111920,3974061,-137859,-3.35%
0,4,4077551,4020035,-57516,-1.41%
0,5,4064653,4018158,-46495,-1.14%
0,6,4073013,4019207,-53806,-1.32%
0,7,4043046,4148360,105314,2.60%
0,8,4025604,4167887,142283,3.53%
0,9,4125415,4133564,8149,0.20%


In [23]:
census.sort('Change', descending=True)

SEX,AGE,2010,2015,Change,Percent Change
0,999,309346863,321418820,12071957,3.90%
1,999,152088043,158229297,6141254,4.04%
2,999,157258820,163189523,5930703,3.77%
0,68,2359816,3436357,1076541,45.62%
0,64,2706055,3536156,830101,30.68%
0,65,2678525,3450043,771518,28.80%
0,66,2621335,3344134,722799,27.57%
0,67,2693707,3304187,610480,22.66%
0,72,1883820,2469605,585785,31.10%
2,68,1254117,1812428,558311,44.52%


In [29]:
# What is the growth rate each year if we assume exponential growth?
(321418820 / 309346863) ** (1/5) - 1

0.007685750230353783

In [35]:
everyone = census.sort('Change', descending=True).row(0)
(everyone.item(3) / everyone.item(2)) ** (1/5) - 1

0.0076857502303537828

In [36]:
census.sort('Change', descending=True)

SEX,AGE,2010,2015,Change,Percent Change
0,999,309346863,321418820,12071957,3.90%
1,999,152088043,158229297,6141254,4.04%
2,999,157258820,163189523,5930703,3.77%
0,68,2359816,3436357,1076541,45.62%
0,64,2706055,3536156,830101,30.68%
0,65,2678525,3450043,771518,28.80%
0,66,2621335,3344134,722799,27.57%
0,67,2693707,3304187,610480,22.66%
0,72,1883820,2469605,585785,31.10%
2,68,1254117,1812428,558311,44.52%


In [39]:
2010 - 68 # 12/7/1941

1942

In [40]:
2015 - 68 # 8/9/1945

1947

In [42]:
us_pop.where('AGE', are.above(97)).show()

SEX,AGE,2010,2015
0,98,47037,61991
0,99,32178,43641
0,100,54410,76974
0,999,309346863,321418820
1,98,9505,14719
1,99,6104,9577
1,100,9352,15088
1,999,152088043,158229297
2,98,37532,47272
2,99,26074,34064


In [44]:
us_pop_2015 = us_pop.drop('2010')
all_ages = us_pop_2015.where('AGE', 999)
all_ages

SEX,AGE,2015
0,999,321418820
1,999,158229297
2,999,163189523


In [47]:
total = all_ages.column('2015').item(0)
all_ages.with_column(
    'Proportion', all_ages.column('2015') / total,
).set_format('Proportion', PercentFormatter)

SEX,AGE,2015,Proportion
0,999,321418820,100.00%
1,999,158229297,49.23%
2,999,163189523,50.77%


In [48]:
infants = us_pop_2015.where('AGE', 0)
infants

SEX,AGE,2015
0,0,3978038
1,0,2035134
2,0,1942904


In [50]:
infants.with_column(
    'Proportion', infants.column('2015') / infants.column('2015').item(0),
).set_format('Proportion', PercentFormatter)

SEX,AGE,2015,Proportion
0,0,3978038,100.00%
1,0,2035134,51.16%
2,0,1942904,48.84%
