In [None]:
import numpy as np

## Birthday Review

In [None]:
k = 40
numerators = np.arange(364, 365-k, -1)
all_different = np.prod(numerators/365)

In [None]:
1-all_different

In [None]:
eight = np.arange(1, 9)
np.cumprod(eight)

In [None]:
k = 60
numerators = np.arange(364, 365-k, -1)
birth = np.concatenate([[0, 0], 1-np.cumprod(numerators/365)])
birth

In [None]:
birth.item(4)

In [None]:
birth.item(40)

In [None]:
import bokeh.plotting as plt
plt.output_notebook(hide_banner=True)
p = plt.figure(title="Same birthday chances", x_axis_label='k', 
               y_axis_label='chance of at least one shared birthday')
p.line(np.arange(k+1), birth, line_width=4)
plt.show(p)

## Comparison

In [None]:
birth >= 0.5

In [None]:
np.round(birth, 1) == 0.5

In [None]:
# You don't need to know the flatnonzero function for this class
np.flatnonzero(np.round(birth, 1) == 0.5)

In [None]:
birth != 0

## Combinations

In [84]:
birthdays = np.random.randint(0, 365, 60)
birthdays

array([199, 203, 348, 155,  49,  98, 329, 321,  12, 112, 326,  58, 112,
        19,  83, 182,  98, 134, 282, 320, 207, 238,   4, 297,  49,  41,
        99, 337, 336, 285, 251, 360,  25, 275,  80, 171, 271,  29,  58,
       183, 143, 349, 118, 119, 251, 200, 217,  14, 353, 135, 301,  66,
       346, 351, 325,  43, 285,  71,  62,  85])

In [85]:
np.sort(birthdays)

array([  4,  12,  14,  19,  25,  29,  41,  43,  49,  49,  58,  58,  62,
        66,  71,  80,  83,  85,  98,  98,  99, 112, 112, 118, 119, 134,
       135, 143, 155, 171, 182, 183, 199, 200, 203, 207, 217, 238, 251,
       251, 271, 275, 282, 285, 285, 297, 301, 320, 321, 325, 326, 329,
       336, 337, 346, 348, 349, 351, 353, 360])

In [None]:
same_sorted = birthdays == np.sort(birthdays)
same_sorted

In [None]:
np.any(same_sorted)

In [70]:
np.all(same_sorted)

False

In [86]:
np.any(np.diff(np.sort(birthdays)) == 0)

True

In [87]:
np.all(np.diff(np.sort(birthdays)) > 0)

False

## Tables

In [None]:
from datascience import *

In [None]:
url = 'http://www.census.gov/popest/data/national/asrh/2014/files/NC-EST2014-AGESEX-RES.csv'
full = Table.read_table(url)
full

In [None]:
partial = full.select(['SEX', 'AGE', 4, 8])
partial

In [None]:
simple = partial.relabeled('POPESTIMATE2010', '2010').relabeled(3, '2014')
simple

In [None]:
simple.column('2014')

In [None]:
simple.column(1)

#### Discussion question

In [None]:
change = simple.column(3) - simple.column(2)
annual_growth_rate = (simple.column(3) / simple.column(2)) ** (1/4) - 1
census = simple.with_columns([
        'Change', change, 
        'Growth', annual_growth_rate])
census

In [None]:
census.column(4).item(1)

In [None]:
census.row(1).item(4)

In [None]:
census.labels

In [None]:
census.num_rows

In [None]:
census.row(5)

In [None]:
census.set_format('Growth', PercentFormatter)
census.set_format([2, 3, 4], NumberFormatter)

## Transformations

In [None]:
census.where('AGE', 999)

In [None]:
males = census.where('SEX', 1)
males.sort('Growth', descending=True)

In [None]:
both = census.where(census.column('SEX') != 0)
both.where(both.column('Change') > 300000).sort('AGE').show()

In [None]:
t = males
t.where(np.logical_and(t.column('AGE') > 55, t.column('AGE') < 70)).show()