In [1]:
import numpy as np
np.set_printoptions(threshold=50, linewidth=50)

In [2]:
# From Lecture 6
from datascience import *
full = Table.read_table('NC-EST2014-AGESEX-RES.csv')
partial = full.select(['SEX', 'AGE', 4, 8])
simple = partial.relabeled(2, '2010').relabeled(3, '2014')
census = simple.with_columns([
        'Change', simple.column('2014')-simple.column('2010'),
        'Growth', (simple.column('2014')/simple.column('2010')) ** (1/4) - 1
    ])
census.set_format('Growth', PercentFormatter)
census.set_format([2, 3, 4], NumberFormatter)

SEX,AGE,2010,2014,Change,Growth
0,0,3951330,3948350,-2980,-0.02%
0,1,3957888,3962123,4235,0.03%
0,2,4090862,3957772,-133090,-0.82%
0,3,4111920,4005190,-106730,-0.66%
0,4,4077552,4003448,-74104,-0.46%
0,5,4064653,4004858,-59795,-0.37%
0,6,4073013,4134352,61339,0.37%
0,7,4043047,4154000,110953,0.68%
0,8,4025604,4119524,93920,0.58%
0,9,4125415,4106832,-18583,-0.11%


In [3]:
males = census.where('SEX', 1)
females = census.where('SEX', 2)

In [4]:
females

SEX,AGE,2010,2014,Change,Growth
2,0,1932910,1930493,-2417,-0.03%
2,1,1937556,1938870,1314,0.02%
2,2,2002177,1935270,-66907,-0.85%
2,3,2010648,1956572,-54076,-0.68%
2,4,1993240,1959950,-33290,-0.42%
2,5,1988080,1961391,-26689,-0.34%
2,6,1993603,2024024,30421,0.38%
2,7,1979908,2031760,51852,0.65%
2,8,1971142,2014402,43260,0.54%
2,9,2018378,2009560,-8818,-0.11%


#### Discussion question

In [5]:
females.sort('2014', descending=True).column('AGE').item(1)

54

In [6]:
males.sort('2014', descending=True)

SEX,AGE,2010,2014,Change,Growth
1,999,152089484,156936487,4847003,0.79%
1,23,2151095,2399883,248788,2.77%
1,24,2161380,2391398,230018,2.56%
1,22,2188228,2367842,179614,1.99%
1,21,2241095,2310734,69639,0.77%
1,25,2177171,2295836,118665,1.34%
1,20,2331846,2269570,-62276,-0.67%
1,54,2091677,2242828,151151,1.76%
1,26,2102375,2240026,137651,1.60%
1,19,2334906,2220790,-114116,-1.24%


In [7]:
males.where(males.column('Change') > 300000).sort('AGE').show()

SEX,AGE,2010,2014,Change,Growth
1,64,1291843,1661474,369631,6.49%
1,65,1272693,1607688,334995,6.02%
1,66,1239805,1589127,349322,6.40%
1,67,1270148,1653257,383109,6.81%
1,999,152089484,156936487,4847003,0.79%


In [8]:
males.where(np.logical_and(males.column('AGE') > 55, 
                           males.column('AGE') < 70)).show()

SEX,AGE,2010,2014,Change,Growth
1,56,1984480,2140722,156242,1.91%
1,57,1910028,2110149,200121,2.52%
1,58,1838703,2027959,189256,2.48%
1,59,1779504,2006900,227396,3.05%
1,60,1742232,1914009,171777,2.38%
1,61,1691413,1837080,145667,2.09%
1,62,1679074,1763504,84430,1.23%
1,63,1753914,1701827,-52087,-0.75%
1,64,1291843,1661474,369631,6.49%
1,65,1272693,1607688,334995,6.02%


In [9]:
census.where(np.logical_or(census.column('AGE') == 18,
                           census.column('AGE') == 19))

SEX,AGE,2010,2014,Change,Growth
0,18,4491005,4225590,-265415,-1.51%
0,19,4571385,4326394,-244991,-1.37%
1,18,2305733,2165062,-140671,-1.56%
1,19,2334906,2220790,-114116,-1.24%
2,18,2185272,2060528,-124744,-1.46%
2,19,2236479,2105604,-130875,-1.50%


## Functions

In [10]:
def percent(x):
    return round(100*x, 2)

In [11]:
percent(1/6)

16.67

In [12]:
sixth = 1/6
percent(sixth)

16.67

In [13]:
percent(1/6000)

0.02

In [14]:
percent(1/60000)

0.0

In [15]:
def percent(x):
    if x < 0.00005:
        return 100 * x
    return round(100 * x, 2)

In [16]:
print('1/6 =', percent(1/6))
print('1/6000 =', percent(1/6000))
print('1/60000 =', percent(1/60000))

1/6 = 16.67
1/6000 = 0.02
1/60000 = 0.0016666666666666668


In [17]:
print('1/60000000000 =', percent(1/60000000000))

1/60000000000 = 1.6666666666666667e-09


In [18]:
def percent(x):
    if x < 1e-8:
        return 0.0
    elif x < 0.00005:
        return 100 * x
    else:
        return round(100 * x, 2)

In [19]:
print('1/6 =', percent(1/6))
print('1/6000 =', percent(1/6000))
print('1/60000 =', percent(1/60000))
print('1/60000000000 =', percent(1/60000000000))

1/6 = 16.67
1/6000 = 0.02
1/60000 = 0.0016666666666666668
1/60000000000 = 0.0


In [20]:
percent(-1/6)

0.0

In [21]:
-1/6 < 1e-8

True

In [22]:
def percent(x):
    if abs(x) < 1e-8:
        return 0.0
    elif abs(x) < 0.00005:
        return 100 * x
    else:
        return round(100 * x, 2)

In [23]:
print('1/6 =', percent(1/6))
print('1/6000 =', percent(1/6000))
print('1/60000 =', percent(1/60000))
print('1/60000000000 =', percent(1/60000000000))
print('-1/6 =', percent(-1/6))

1/6 = 16.67
1/6000 = 0.02
1/60000 = 0.0016666666666666668
1/60000000000 = 0.0
-1/6 = -16.67


In [24]:
census.where(np.logical_or(census.column('AGE') == 18,
                           census.column('AGE') == 19))

SEX,AGE,2010,2014,Change,Growth
0,18,4491005,4225590,-265415,-1.51%
0,19,4571385,4326394,-244991,-1.37%
1,18,2305733,2165062,-140671,-1.56%
1,19,2334906,2220790,-114116,-1.24%
2,18,2185272,2060528,-124744,-1.46%
2,19,2236479,2105604,-130875,-1.50%


In [25]:
def voting_teens(t):
    return t.where(np.logical_or(t.column('AGE') == 18, t.column('AGE') == 19))

In [26]:
voting_teens(females)

SEX,AGE,2010,2014,Change,Growth
2,18,2185272,2060528,-124744,-1.46%
2,19,2236479,2105604,-130875,-1.50%


In [27]:
voting_teens(males)

SEX,AGE,2010,2014,Change,Growth
1,18,2305733,2165062,-140671,-1.56%
1,19,2334906,2220790,-114116,-1.24%


In [28]:
voting_teens(census)

SEX,AGE,2010,2014,Change,Growth
0,18,4491005,4225590,-265415,-1.51%
0,19,4571385,4326394,-244991,-1.37%
1,18,2305733,2165062,-140671,-1.56%
1,19,2334906,2220790,-114116,-1.24%
2,18,2185272,2060528,-124744,-1.46%
2,19,2236479,2105604,-130875,-1.50%


In [29]:
voting_teens(census).column('Growth')

array([-0.01511402, -0.01367608, -0.01561422,
       -0.01244902, -0.01458707, -0.01496204])

In [30]:
voting_teens(census).apply(percent, 'Growth')

array([-1.51, -1.37, -1.56, -1.24, -1.46, -1.5 ])

## Pivot

In [35]:
pivoted = census.relabeled('2014', 'Population').pivot('SEX', 'AGE', 'Population', sum)
fraction = pivoted.with_column('Fraction', pivoted.column(2)/pivoted.column(1))
fraction.set_format('Fraction', PercentFormatter).show()

AGE,0 Population,1 Population,2 Population,Fraction
0,3948350,2017857,1930493,51.11%
1,3962123,2023253,1938870,51.06%
2,3957772,2022502,1935270,51.10%
3,4005190,2048618,1956572,51.15%
4,4003448,2043498,1959950,51.04%
5,4004858,2043467,1961391,51.02%
6,4134352,2110328,2024024,51.04%
7,4154000,2122240,2031760,51.09%
8,4119524,2105122,2014402,51.10%
9,4106832,2097272,2009560,51.07%


## Group

In [32]:
cardio = Table.read_table("~/Downloads/Data 8 Cardio Participation Log (Responses) - Form Responses 1.csv")
cardio

Timestamp,On what day did you attend?,How many laps did you complete?,In how many minutes?,How tired were you at the end?,What's your identifier?
1/20/2016 22:05:05,"Thursday, 1/21",5.0,14,4,blue running shoes with black black pants.
1/21/2016 8:07:05,"Thursday, 1/21",22.0,30,3,Cal lights>WIRAs
1/21/2016 8:23:16,"Thursday, 1/21",6.0,18,6,Ontario
1/21/2016 8:36:59,"Thursday, 1/21",8.0,20,5,Smooth Criminal
1/21/2016 8:37:28,"Thursday, 1/21",8.0,20,5,Tomato
1/21/2016 9:02:25,"Thursday, 1/21",5.5,25,5,SS
1/21/2016 9:07:08,"Thursday, 1/21",12.0,35,4,astennet
1/21/2016 9:21:12,"Thursday, 1/21",8.0,35,3,BR
1/21/2016 9:29:18,"Thursday, 1/21",6.0,30,5,TrueLoveInAGraph
1/21/2016 9:38:18,"Thursday, 1/21",5.0,11,3,Edward Snowden


In [33]:
days = cardio.select([1, 2, 3]).group(0, sum)
days

On what day did you attend?,How many laps did you complete? sum,In how many minutes? sum
"Thursday, 1/21",131,394.0
"Thursday, 1/28",99,265.5


In [34]:
days.with_column('Minutes/Lap', days.column(2)/days.column(1))

On what day did you attend?,How many laps did you complete? sum,In how many minutes? sum,Minutes/Lap
"Thursday, 1/21",131,394.0,3.00763
"Thursday, 1/28",99,265.5,2.68182
