In [None]:
import numpy as np
from datascience import *
%matplotlib inline
np.set_printoptions(threshold=50)

## Review

In [None]:
# This table can be found online: https://www.statcrunch.com/app/index.php?dataid=1843341
nba = Table.read_table('http://inferentialthinking.com/notebooks/nba_salaries.csv').relabeled(3, 'SALARY').drop(2)
nba

In [None]:
nba.where(1, 'PG').where(2, are.above(15)).column(0)

In [None]:
nba.with_row(['DeNero', 'C', 0.0])
nba.where('PLAYER', are.containing('DeNero'))

## Population & Age

In [None]:
# As of Jan 2017, this census file is online here: 
data = 'http://www2.census.gov/programs-surveys/popest/datasets/2010-2015/national/asrh/nc-est2015-agesex-res.csv'

# A copy can be accessed here in case census.gov moves the file:
# data = 'http://inferentialthinking.com/notebooks/nc-est2015-agesex-res.csv'

full_census_table = Table.read_table(data)
full_census_table

In [None]:
partial = full_census_table.select('SEX', 'AGE', 'POPESTIMATE2010', 'POPESTIMATE2015')
partial

In [None]:
us_pop = partial.relabeled(2, '2010').relabeled(3, '2015')
us_pop

In [None]:
us_pop.column('2015') - us_pop.column('2010')

In [None]:
change = us_pop.column('2015') - us_pop.column('2010')
census = us_pop.with_columns(
    'Change', change,
    'Percent Change', change/us_pop.column('2010')
)
census.set_format('Percent Change', PercentFormatter)

In [None]:
census.sort('Change', descending=True)

In [None]:
(321418820/309346863) ** (1/5) - 1

In [None]:
everyone = census.sort('Change', descending=True).row(0)
everyone

In [None]:
type(everyone)

In [None]:
(everyone.item(3)/everyone.item(2)) ** (1/5) - 1

In [None]:
five_year_rate = everyone.item(5)
(five_year_rate + 1) ** (1/5) - 1

In [None]:
2010-68 # Bombing of Pearl Harbor was 12/7/1941

In [None]:
2015-68 # Bombing of Nagasaki was 8/9/1945

## Population & Gender

In [None]:
us_pop.where('AGE', are.above(97)).show()

In [None]:
us_pop_2015 = us_pop.drop('2010')
all_ages = us_pop_2015.where('AGE', are.equal_to(999))
all_ages

In [None]:
all_ages.column('2015').item(0)

In [None]:
all_ages.with_column(
    'Proportion', all_ages.column('2015')/all_ages.column('2015').item(0)
).set_format('Proportion', PercentFormatter)

In [None]:
infants = us_pop_2015.where('AGE', are.equal_to(0))
infants

In [None]:
infants.with_column(
    'Proportion', infants.column('2015')/infants.column('2015').item(0)
).set_format('Proportion', PercentFormatter)

In [None]:
females_all_rows = us_pop_2015.where('SEX', are.equal_to(2))
females = females_all_rows.where('AGE', are.not_equal_to(999))
females

In [None]:
males_all_rows = us_pop_2015.where('SEX', are.equal_to(1))
males = males_all_rows.where('AGE', are.not_equal_to(999))
males

In [None]:
males.column('AGE')

In [None]:
females.column('AGE')

In [None]:
ratios = Table().with_columns(
    'AGE', females.column('AGE'),
    '2015 F:M RATIO', females.column('2015')/males.column('2015')
)
ratios

In [None]:
ratios.where('AGE', are.above(75)).show()

In [None]:
ratios.plot('AGE')

## Advanced Where

In [None]:
3 > 2

In [None]:
1 > 2

In [None]:
np.arange(5) > 2

In [None]:
us_pop.where('AGE', 70)

In [None]:
us_pop.where('AGE', 70).where([False, True, True])

In [None]:
seventy = us_pop.where('AGE', 70)
seventy.column('2010') < 2000000

In [None]:
seventy.where(seventy.column('2010') < 2000000)

In [None]:
us_pop.column('2010') > 4e6

In [None]:
us_pop.where('2010', are.above(4e6))

In [None]:
us_pop.where(us_pop.column('2010') > 4e6)

In [None]:
# us_pop.where('AGE', 70).where(us_pop.column('2010') > 4e6)

In [None]:
us_pop.column('2015') / us_pop.column('2010') > 1.5

In [None]:
us_pop.where(us_pop.column('2015') / us_pop.column('2010') > 1.5)

In [None]:
census.where('Percent Change', are.above(.5))

In [None]:
us_pop.where('2010', are.above, '2015')