In [62]:
import numpy as np
from datascience import *
%matplotlib inline

## Sort

In [None]:
# This table can be found online: https://www.statcrunch.com/app/index.php?dataid=1843341
nba = Table.read_table('http://inferentialthinking.com/notebooks/nba_salaries.csv')
nba

In [None]:
nba.show(3)

In [None]:
sum(nba.column(3))

In [None]:
max(nba.column(3))

In [None]:
nba.sort('PLAYER').show(5)

In [None]:
nba.sort(3).show(5)

In [None]:
nba.sort(3, descending=True)

In [None]:
nba.sort(3, descending=True).sort(1)

In [None]:
help(nba.sort)

In [None]:
nba.sort(3, True).sort(1, distinct=True)

## Lists

In [None]:
make_array(2, 3.0)

In [None]:
make_array(2, 3.0).item(0)

In [None]:
make_array(2, 'three')

In [None]:
make_array(2, 'three').item(0)

In [None]:
[2, 'three']

In [None]:
type([2, 'three'])

In [None]:
row = ['John DeNero', 'C', 'Berkeley Data Scientists', 0.0]
nba.with_row(row).sort(3)

## Take

In [None]:
nba.take(0)

In [None]:
nba.take([0, 1, 2])

In [None]:
nba.sort(3, descending=True).take([0, 1, 2])

In [None]:
rich = nba.sort(3, descending=True).take([0, 1, 2])
rich.sort(2)

In [None]:
nba

In [None]:
np.arange(8, 15)

In [None]:
nba.take(np.arange(8, 15))

## Where

In [None]:
salary = 3
nba.sort(salary)

In [None]:
nba.where(salary, are.above(10))

In [None]:
nba.where(salary, are.above(10)).sort(salary)

In [None]:
nba.where('PLAYER', are.equal_to('Stephen Curry'))

In [None]:
nba.where('PLAYER', 'Stephen Curry')

In [None]:
nba.where('TEAM', 'Golden State Warriors')

In [None]:
nba.where('TEAM', are.containing('Los Angeles')).sort(salary)

In [None]:
nba.where(salary, are.between(3, 4))

In [None]:
nba.where('POSITION', 'PG').where(salary, are.above(15))

In [None]:
nba.where('PLAYER', are.containing('DeNero'))

## Population & Age

In [None]:
# As of Jan 2017, this census file is online here: 
data = 'http://www2.census.gov/programs-surveys/popest/datasets/2010-2015/national/asrh/nc-est2015-agesex-res.csv'

# A copy can be accessed here in case census.gov moves the file:
# data = 'http://inferentialthinking.com/notebooks/nc-est2015-agesex-res.csv'

full_census_table = Table.read_table(data)
full_census_table

In [None]:
partial_census_table = full_census_table.select('SEX', 'AGE', 'POPESTIMATE2010', 'POPESTIMATE2014')
partial_census_table

In [None]:
us_pop = partial_census_table.relabeled('POPESTIMATE2010', '2010').relabeled('POPESTIMATE2014', '2014')
us_pop

In [None]:
us_pop.column('2014') - us_pop.column('2010')

In [None]:
change = us_pop.column('2014') - us_pop.column('2010')
census = us_pop.with_columns(
    'Change', change,
    'Percent Change', change/us_pop.column('2010')
)
census.set_format('Percent Change', PercentFormatter)

In [None]:
census.sort('Change', descending=True)

In [None]:
2014-67

In [None]:
2010-67

## Population & Gender

In [None]:
us_pop.where('AGE', are.above(97)).show()

In [None]:
us_pop_2014 = us_pop.drop('2010')
all_ages = us_pop_2014.where('AGE', are.equal_to(999))
all_ages

In [None]:
all_ages.column('2014').item(0)

In [None]:
all_ages.with_column(
    'Proportion', all_ages.column('2014')/all_ages.column('2014').item(0)
).set_format('Proportion', PercentFormatter)

In [None]:
infants = us_pop_2014.where('AGE', are.equal_to(0))
infants

In [None]:
infants.with_column(
    'Proportion', infants.column('2014')/infants.column('2014').item(0)
).set_format('Proportion', PercentFormatter)

In [None]:
females_all_rows = us_pop_2014.where('SEX', are.equal_to(2))
females = females_all_rows.where('AGE', are.not_equal_to(999))
females

In [None]:
males_all_rows = us_pop_2014.where('SEX', are.equal_to(1))
males = males_all_rows.where('AGE', are.not_equal_to(999))
males

In [None]:
males.column('AGE')

In [None]:
females.column('AGE')

In [None]:
ratios = Table().with_columns(
    'AGE', females.column('AGE'),
    '2014 F:M RATIO', females.column('2014')/males.column('2014')
)
ratios

In [None]:
ratios.where('AGE', are.above(75)).show()

In [None]:
ratios.plot('AGE')