In [38]:
import numpy as np
np.set_printoptions(threshold=50, linewidth=50)

## Tables

In [2]:
from datascience import *

In [3]:
Table(['Even', 'Odd'])

Even,Odd


In [4]:
Table(['Even', 'Odd']).with_row([2, 3])

Even,Odd
2,3


In [5]:
Table(['Even', 'Odd']).with_rows([[2, 3], [4, 5]])

Even,Odd
2,3
4,5


In [6]:
Table().with_column('Odd', np.arange(1, 5, 2))

Odd
1
3


In [7]:
Table().with_columns([
        'Even', [2, 4],
        'Odd', [3, 5]
    ])

Even,Odd
2,3
4,5


In [8]:
url = 'http://www.census.gov/popest/data/national/asrh/2014/files/NC-EST2014-AGESEX-RES.csv'
full = Table.read_table(url)
full

SEX,AGE,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014
0,0,3944153,3944160,3951330,3963071,3926665,3945610,3948350
0,1,3978070,3978090,3957888,3966510,3978006,3943077,3962123
0,2,4096929,4096939,4090862,3971573,3979952,3992690,3957772
0,3,4119040,4119051,4111920,4102501,3983049,3992425,4005190
0,4,4063170,4063186,4077552,4122303,4112638,3994047,4003448
0,5,4056858,4056872,4064653,4087713,4132210,4123408,4004858
0,6,4066381,4066412,4073013,4074979,4097780,4143094,4134352
0,7,4030579,4030594,4043047,4083240,4084964,4108615,4154000
0,8,4046486,4046497,4025604,4053206,4093213,4095827,4119524
0,9,4148353,4148369,4125415,4035769,4063193,4104133,4106832


In [9]:
partial = full.select(['SEX', 'AGE', 4, 8])
partial

SEX,AGE,POPESTIMATE2010,POPESTIMATE2014
0,0,3951330,3948350
0,1,3957888,3962123
0,2,4090862,3957772
0,3,4111920,4005190
0,4,4077552,4003448
0,5,4064653,4004858
0,6,4073013,4134352
0,7,4043047,4154000
0,8,4025604,4119524
0,9,4125415,4106832


In [10]:
simple = partial.relabeled('POPESTIMATE2010', '2010').relabeled(3, '2014')
simple

SEX,AGE,2010,2014
0,0,3951330,3948350
0,1,3957888,3962123
0,2,4090862,3957772
0,3,4111920,4005190
0,4,4077552,4003448
0,5,4064653,4004858
0,6,4073013,4134352
0,7,4043047,4154000
0,8,4025604,4119524
0,9,4125415,4106832


In [11]:
simple.column('AGE')

array([  0,   1,   2, ...,  99, 100, 999])

In [12]:
simple.column(1)

array([  0,   1,   2, ...,  99, 100, 999])

In [13]:
simple.column(2).item(3)

4111920

#### Discussion question

In [14]:
census = simple.with_columns([
        'Change', simple.column(3) - simple.column(2), 
        'Growth', (simple.column(3) / simple.column(2)) ** (1/4) - 1])
census

SEX,AGE,2010,2014,Change,Growth
0,0,3951330,3948350,-2980,-0.000188597
0,1,3957888,3962123,4235,0.000267397
0,2,4090862,3957772,-133090,-0.00823453
0,3,4111920,4005190,-106730,-0.0065532
0,4,4077552,4003448,-74104,-0.00457471
0,5,4064653,4004858,-59795,-0.00369821
0,6,4073013,4134352,61339,0.00374389
0,7,4043047,4154000,110953,0.00679123
0,8,4025604,4119524,93920,0.00578232
0,9,4125415,4106832,-18583,-0.00112804


In [15]:
census.labels

('SEX', 'AGE', '2010', '2014', 'Change', 'Growth')

In [16]:
census.num_rows

306

In [17]:
census.num_columns

6

In [18]:
census.set_format('Growth', PercentFormatter)
census.set_format([2, 3, 4], NumberFormatter)

SEX,AGE,2010,2014,Change,Growth
0,0,3951330,3948350,-2980,-0.02%
0,1,3957888,3962123,4235,0.03%
0,2,4090862,3957772,-133090,-0.82%
0,3,4111920,4005190,-106730,-0.66%
0,4,4077552,4003448,-74104,-0.46%
0,5,4064653,4004858,-59795,-0.37%
0,6,4073013,4134352,61339,0.37%
0,7,4043047,4154000,110953,0.68%
0,8,4025604,4119524,93920,0.58%
0,9,4125415,4106832,-18583,-0.11%


In [19]:
max(census.column('AGE'))

999

## Transformations

In [20]:
totals = census.where('AGE', 999)
totals

SEX,AGE,2010,2014,Change,Growth
0,999,309347057,318857056,9509999,0.76%
1,999,152089484,156936487,4847003,0.79%
2,999,157257573,161920569,4662996,0.73%


In [21]:
totals.take(0)

SEX,AGE,2010,2014,Change,Growth
0,999,309347057,318857056,9509999,0.76%


In [22]:
totals.take([1, 2])

SEX,AGE,2010,2014,Change,Growth
1,999,152089484,156936487,4847003,0.79%
2,999,157257573,161920569,4662996,0.73%


In [23]:
sum(totals.take([1, 2]).column('2014'))

318857056

In [24]:
census.column('AGE') < 999

array([ True,  True,  True, ...,  True,  True, False], dtype=bool)

In [25]:
census.where(census.column('AGE') < 999)

SEX,AGE,2010,2014,Change,Growth
0,0,3951330,3948350,-2980,-0.02%
0,1,3957888,3962123,4235,0.03%
0,2,4090862,3957772,-133090,-0.82%
0,3,4111920,4005190,-106730,-0.66%
0,4,4077552,4003448,-74104,-0.46%
0,5,4064653,4004858,-59795,-0.37%
0,6,4073013,4134352,61339,0.37%
0,7,4043047,4154000,110953,0.68%
0,8,4025604,4119524,93920,0.58%
0,9,4125415,4106832,-18583,-0.11%


In [26]:
females = census.where('SEX', 2)
sum(females.where(females.column('AGE') < 999).column('2014'))

161920569

#### Discussion question

In [27]:
females.where(females.column(3)/females.column(2) >= 1.03).num_rows

52

In [28]:
females.where((females.column('Growth')+1)**4 >= 1.03).num_rows

52

In [29]:
males = census.where('SEX', 1)
males.sort('Growth', descending=True)

SEX,AGE,2010,2014,Change,Growth
1,99,6104,9037,2933,10.31%
1,100,9351,13729,4378,10.08%
1,98,9504,13649,4145,9.47%
1,93,60182,85980,25798,9.33%
1,96,22022,31235,9213,9.13%
1,94,43828,62130,18302,9.12%
1,97,14775,20479,5704,8.50%
1,95,31736,42824,11088,7.78%
1,91,104291,138080,33789,7.27%
1,92,83462,109873,26411,7.12%


In [30]:
males.where(males.column('Change') > 300000).sort('AGE').show()

SEX,AGE,2010,2014,Change,Growth
1,64,1291843,1661474,369631,6.49%
1,65,1272693,1607688,334995,6.02%
1,66,1239805,1589127,349322,6.40%
1,67,1270148,1653257,383109,6.81%
1,999,152089484,156936487,4847003,0.79%


In [31]:
males.where(np.logical_and(males.column('AGE') > 55, 
                           males.column('AGE') < 70)).show()

SEX,AGE,2010,2014,Change,Growth
1,56,1984480,2140722,156242,1.91%
1,57,1910028,2110149,200121,2.52%
1,58,1838703,2027959,189256,2.48%
1,59,1779504,2006900,227396,3.05%
1,60,1742232,1914009,171777,2.38%
1,61,1691413,1837080,145667,2.09%
1,62,1679074,1763504,84430,1.23%
1,63,1753914,1701827,-52087,-0.75%
1,64,1291843,1661474,369631,6.49%
1,65,1272693,1607688,334995,6.02%


In [32]:
females.where(np.logical_or(females.column('AGE') == 18,
                            females.column('AGE') == 19))

SEX,AGE,2010,2014,Change,Growth
2,18,2185272,2060528,-124744,-1.46%
2,19,2236479,2105604,-130875,-1.50%


## Group

In [33]:
no_sums = census.where(np.logical_and(census.column('AGE') < 999,
                                      census.column('SEX') > 0))
no_sums

SEX,AGE,2010,2014,Change,Growth
1,0,2018420,2017857,-563,-0.01%
1,1,2020332,2023253,2921,0.04%
1,2,2088685,2022502,-66183,-0.80%
1,3,2101272,2048618,-52654,-0.63%
1,4,2084312,2043498,-40814,-0.49%
1,5,2076573,2043467,-33106,-0.40%
1,6,2079410,2110328,30918,0.37%
1,7,2063139,2122240,59101,0.71%
1,8,2054462,2105122,50660,0.61%
1,9,2107037,2097272,-9765,-0.12%


In [34]:
no_sums.group('SEX')

SEX,AGE len,2010 len,2014 len,Change len,Growth len
1,101,101,101,101,101
2,101,101,101,101,101


In [35]:
no_sums.group('SEX', sum)

SEX,AGE sum,2010 sum,2014 sum,Change sum,Growth sum
1,5050,152089484,156936487,4847003,1.97217
2,5050,157257573,161920569,4662996,1.32278


In [36]:
no_sums.group('SEX', list)

SEX,AGE list,2010 list,2014 list,Change list,Growth list
1,"[ 0 1 2 ..., 98 99 100]","[2018420 2020332 2088685 ..., 9504 6104 9351]","[2017857 2023253 2022502 ..., 13649 9037 13729]","[ -563 2921 -66183 ..., 4145 2933 4378]","[ -6.97400564e-05 3.61254690e-04 -8.01751751e-03 ..., ..."
2,"[ 0 1 2 ..., 98 99 100]","[1932910 1937556 2002177 ..., 37533 26074 45058]","[1930493 1938870 1935270 ..., 46536 32791 58468]","[ -2417 1314 -66907 ..., 9003 6717 13410]","[-0.00031276 0.0001695 -0.00846106 ..., 0.05522226  ..."


In [37]:
pivoted = no_sums.pivot('SEX', 'AGE', '2014', sum)
m_vs_f = pivoted.relabeled(1, 'Male').relabeled(2, 'Female')
m_vs_f.with_column('Diff', m_vs_f.column(2)-m_vs_f.column(1)).show()

AGE,Male,Female,Diff
0,2017857,1930493,-87364
1,2023253,1938870,-84383
2,2022502,1935270,-87232
3,2048618,1956572,-92046
4,2043498,1959950,-83548
5,2043467,1961391,-82076
6,2110328,2024024,-86304
7,2122240,2031760,-90480
8,2105122,2014402,-90720
9,2097272,2009560,-87712
