In [None]:
from datascience import *
import numpy as np
import warnings
warnings.filterwarnings("ignore")

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
plots.rcParams["patch.force_edgecolor"] = True

# Lecture 2.2 Demo

### Functions

Simple function:

In [None]:
def triple(x):
    return 3 * x

In [None]:
triple(3)

In [None]:
num = 4
triple(num)

In [None]:
triple(num * 5)

In [None]:
x

In [None]:
triple('ha')

In [None]:
triple(np.arange(4))

More complex functions:

The function in the following cell computes the hypotenuse of a triangle:

$h^2 = x^2 + y^2 \hspace{20 pt} => \hspace{20 pt} h = \sqrt{x^2 + y^2}$

In [None]:
def hypotenuse(x,y):
    hypot_squared = (x ** 2 + y ** 2)
    hypot = hypot_squared ** 0.5
    return hypot

In [None]:
hypotenuse(9, 12)

In [None]:
hypotenuse(2, 2)

### Apply

In [None]:
ages = Table().with_columns(
    'Person', make_array('Jim', 'Pam', 'Michael', 'Creed'),
    'Birth Year', make_array(1985, 1988, 1967, 1904)
)
ages

In [None]:
def get_age(x):
    approx_age = 2020 - x
    return approx_age

In [None]:
get_age(1996)

In [None]:
ages.apply(get_age, 'Birth Year')

In [None]:
ages.with_column('Approx. Age', ages.apply(get_age, 'Birth Year'))

### Grouping

Single columns groups:

In [None]:
nba_salaries = Table.read_table('https://www.inferentialthinking.com/data/nba_salaries.csv')
nba_salaries

In [None]:
nba_salaries.group("TEAM")

In [None]:
nba_salaries.group('POSITION')

In [None]:
nba_salaries.select('POSITION', "'15-'16 SALARY").group('POSITION', np.average)

In [None]:
nba_salaries.select('POSITION', "'15-'16 SALARY").group('POSITION', max)

Cross-classification (grouping two columns)

In [None]:
nba_salaries.select('POSITION', 'TEAM', "'15-'16 SALARY").group(['POSITION', 'TEAM'])

In [None]:
nba_salaries.select('POSITION', 'TEAM', "'15-'16 SALARY").group(['POSITION', 'TEAM'], np.average)

In [None]:
nba_salaries.select('POSITION', 'TEAM', "'15-'16 SALARY").pivot('POSITION', 'TEAM')

In [None]:
nba_salaries.select('POSITION', 'TEAM', "'15-'16 SALARY") \
    .pivot('POSITION', 'TEAM', values = "'15-'16 SALARY", collect = np.average)

### Joining

In [None]:
drinks = Table(['Drink', 'Cafe', 'Price'])
drinks

In [None]:
drinks = drinks.with_rows([
    ['Milk Tea', 'Asha', 5.5],
    ['Espresso', 'Strada',  1.75],
    ['Latte',    'Strada',  3.25],
    ['Espresso', "FSM",   2]
])
drinks

In [None]:
discounts = Table().with_columns(
    'Coupon % off', make_array(10, 25, 5),
    'Location', make_array('Asha', 'Strada', 'Asha')
)
discounts

In [None]:
combined = drinks.join('Cafe', discounts, 'Location')
combined

In [None]:
discounted_frac = 1 - combined.column('Coupon % off') / 100
combined.with_column(
    'Discounted Price', 
    combined.column('Price') * discounted_frac
)

### Comparisons

In [None]:
3 > 1

In [None]:
type(3 > 1)

In [None]:
True

In [None]:
true

In [None]:
3 == 3

In [None]:
3 = 3

In [None]:
3 == 3.0

In [None]:
10 != 2

In [None]:
x = 14
y = 3

In [None]:
x < 15

In [None]:
12 < x < 14

In [None]:
10 < x-y < 13

In [None]:
x > 13 and y < 3.14

In [None]:
x > 13 or y > 3.14

In [None]:
pets = make_array('cat', 'cat', 'dog', 'cat', 'dog', 'rabbit')
pets

In [None]:
pets == 'cat'

In [None]:
sum(make_array(True, True, False, True, False, False))

In [None]:
sum(pets == 'cat')

In [None]:
np.count_nonzero(pets == 'cat')

In [None]:
x = np.arange(20, 31)
x

In [None]:
x_larger_than_24 = x > 24
x_larger_than_24

In [None]:
y = np.arange(15, 26)
y

In [None]:
y_larger_than_24 = y > 24
y_larger_than_24

In [None]:
x_larger_than_24 | y_larger_than_24 # or

In [None]:
x_larger_than_24 & y_larger_than_24 # and

### Advanced `where` Method

In [None]:
ages = make_array(16, 22, 18, 15, 19, 15, 16, 21)
age = Table().with_column('Age', ages)

In [None]:
age

In [None]:
age.where('Age', are.above_or_equal_to(18))

In [None]:
voter = ages >= 18
voter

In [None]:
age.where(voter)

In [None]:
is_voter = are.above_or_equal_to(18)
type(is_voter)

In [None]:
is_voter(22)

In [None]:
is_voter(3)

In [None]:
age.apply(is_voter, 'Age')

In [None]:
ages >= 18

In [None]:
voter

In [None]:
def my_voter_function(x):
    return x >= 18

In [None]:
age.where('Age', are.above_or_equal_to(18))

In [None]:
age.where(voter)

In [None]:
age.where('Age', my_voter_function)