In [None]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

## One Attribute Group

**Please run all cells before this cell, including the import cell at the top of the notebook.**

In [None]:
all_cones = Table.read_table('cones.csv')
cones = all_cones.drop('Color').exclude(5)
cones

In [None]:
cones.group('Flavor')

In [None]:
cones.group('Flavor', list)

In [None]:
cones.group('Flavor', len)

In [None]:
cones.group('Flavor', min)

In [None]:
min(cones.where('Flavor', 'chocolate').column('Price'))

In [None]:
cones.group('Flavor', np.average)

In [None]:
def data_range(x):
    return max(x) - min(x)

In [None]:
cones.group('Flavor', data_range)

In [None]:
nba = Table.read_table('nba_salaries.csv').relabeled(3, 'SALARY')
nba

In [None]:
teams_and_money = nba.select('TEAM', 'SALARY')
teams_and_money.group('TEAM', sum).sort(1, descending=True)

In [None]:
nba.group('TEAM', sum)

In [None]:
position_and_money = nba.select('POSITION', 'SALARY')
position_and_money.group('POSITION')

In [None]:
position_and_money.group('POSITION', np.average)

## Cross Classification

**Please run all cells before this cell, including the previous example cells and the import cell at the top of the notebook.**

In [None]:
all_cones

In [None]:
all_cones.group('Flavor')

In [None]:
all_cones.group(['Flavor', 'Color'])

In [None]:
all_cones.group(['Flavor', 'Color'], max)

In [None]:
nba

In [None]:
nba.drop(0).group(['TEAM', 'POSITION'], np.average)

In [None]:
nba.drop(0, 2).group('POSITION', np.average)

In [None]:
full_table = Table.read_table('educ_inc.csv')
ca_2014 = full_table.where('Year', are.equal_to('1/1/14 0:00')).where('Age', are.not_equal_to('00 to 17')).drop(0).sort('Population Count')
ca_2014

In [None]:
no_ages = ca_2014.drop(0)
no_ages

In [None]:
no_ages.group([0, 1, 2], sum)

## Example 1: NBA Salaries with group

**Please run all cells before this cell, including the previous example cells and the import cell at the top of the notebook.**

In [None]:
starter_salaries = nba.drop(0).group(['TEAM', 'POSITION'], max)
starter_salaries

In [None]:
starter_salaries.drop(1).group('TEAM', sum).sort(1, descending=True)

## Pivot Tables

**Please run all cells before this cell, including the previous example cells and the import cell at the top of the notebook.**

In [None]:
all_cones

In [None]:
all_cones.group(['Flavor', 'Color'])

In [None]:
all_cones.pivot('Flavor', 'Color')   # pivot table, contingency table

In [None]:
all_cones.pivot('Color', 'Flavor')

In [None]:
all_cones.pivot('Color', 'Flavor', values = 'Price', collect = max)

In [None]:
nba

In [None]:
nba.drop(0).group(['TEAM', 'POSITION'], np.average)

In [None]:
nba.pivot('POSITION', 'TEAM', 'SALARY', np.average)

## Example 2: NBA Salaries with pivot

**Please run all cells before this cell, including the previous example cells and the import cell at the top of the notebook.**

In [None]:
step_1 = nba.pivot('POSITION', 'TEAM', 'SALARY', max)
step_1

In [None]:
totals = step_1.drop(0).apply(sum)
step_1.with_columns('TOTAL', totals).sort(6, descending=True)

## Comparing Distributions

**Please run all cells before this cell, including the previous example cells and the import cell at the top of the notebook.**

In [None]:
ca_2014

In [None]:
educ_income = ca_2014.pivot(2, 3, 4, sum)
educ_income

In [None]:
def percent(x):
    """Convert an array of counts into percents"""
    return np.round((x / sum(x)) * 100, 2)

In [None]:
distributions = educ_income.select(0).with_columns(
    'Bachelors or Higher', percent(educ_income.column(1)),
    'High School', percent(educ_income.column(2))
)
distributions

In [None]:
sum(distributions.column(1))

In [None]:
distributions.barh(0)