# Lecture 5 - Demo

In [1]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

# Tables Recap

In [2]:
du_bois = Table.read_table('du_bois.csv')
du_bois

CLASS,ACTUAL AVERAGE,RENT,FOOD,CLOTHES,TAXES,OTHER,STATUS
100-200,139.1,0.19,0.43,0.28,0.001,0.099,POOR
200-300,249.45,0.22,0.47,0.23,0.04,0.04,POOR
300-400,335.66,0.23,0.43,0.18,0.045,0.115,FAIR
400-500,433.82,0.18,0.37,0.15,0.055,0.245,FAIR
500-750,547.0,0.13,0.31,0.17,0.05,0.34,COMFORTABLE
750-1000,880.0,0.0,0.37,0.19,0.08,0.36,COMFORTABLE
1000 and over,1125.0,0.0,0.29,0.16,0.045,0.505,WELL-TO-DO


In [3]:
# Labels
du_bois.labels

('CLASS',
 'ACTUAL AVERAGE',
 'RENT',
 'FOOD',
 'CLOTHES',
 'TAXES',
 'OTHER',
 'STATUS')

In [12]:
# Columns
du_bois.column('RENT').item(0)

0.19

In [10]:
du_bois.select('RENT', 'FOOD')

RENT,FOOD
0.19,0.43
0.22,0.47
0.23,0.43
0.18,0.37
0.13,0.31
0.0,0.37
0.0,0.29


In [13]:
# Rows
du_bois.num_rows

7

**Discussion Question** What is another way to find the number of rows in a table?

In [14]:
len(du_bois.column('CLASS'))

7

**Discussion Questions**:

Use the table functions to find the income bracket (“class”) with the ‘POOR’ status that spent the highest percentage of their income on food.

In [15]:
poor_du_bois = du_bois.where('STATUS', 'POOR')
poor_du_bois

CLASS,ACTUAL AVERAGE,RENT,FOOD,CLOTHES,TAXES,OTHER,STATUS
100-200,139.1,0.19,0.43,0.28,0.001,0.099,POOR
200-300,249.45,0.22,0.47,0.23,0.04,0.04,POOR


In [16]:
highest_poor_db = poor_du_bois.sort('FOOD', descending=True)
highest_poor_db

CLASS,ACTUAL AVERAGE,RENT,FOOD,CLOTHES,TAXES,OTHER,STATUS
200-300,249.45,0.22,0.47,0.23,0.04,0.04,POOR
100-200,139.1,0.19,0.43,0.28,0.001,0.099,POOR


In [18]:
label = highest_poor_db.column('CLASS').item(0)
label

'200-300'

# Functions

## Defining Functions ##  

Example: Create a function that takes a numerical input and triples it: $\textsf{triple}(x)=3\,x$

In [19]:
def triple(x):
    return 3 * x

We can also assign a value to a name, and call the function on the name:

In [20]:
num = triple(3)

In [21]:
num

9

In [22]:
four = 4

In [23]:
triple(four)

12

In [24]:
triple(four * 5)

60

## The Anatomy of a Function ##  
    
```python
def functionname(Arguments_Parameters_Expressions_or_Values):
      body
      return return_expression
```

## Functions are Type-Agnostic  ## 

In [25]:
triple('ha')

'hahaha'

In [27]:
triple(np.arange(4))

array([0, 3, 6, 9])

In [28]:
def subtract_three(x):
    return x - 3

In [29]:
subtract_three(1.2)

-1.8

In [30]:
subtract_three(1)

-2

In [31]:
subtract_three('ha')

TypeError: unsupported operand type(s) for -: 'str' and 'int'

### Discussion ###

- What does the following function do?
- What type of input does it take?
- What type of output does it produce?
- What's a good name for the function?

```python
def f(s):     
      return np.round(s / sum(s) * 100, 2)
```

In [32]:
def percent_of_total(s):
    return np.round(s / sum(s) * 100, 2)

In [33]:
first_four = make_array(1,4,6,9)
first_four

array([1, 4, 6, 9])

In [36]:
percent_of_total(first_four)

array([ 5., 20., 30., 45.])

In [37]:
percent_of_total(make_array(1,600,6400))

array([1.000e-02, 8.570e+00, 9.142e+01])

### Functions Can Take Multiple Arguments ###

Example: Calculate the Hypotenuse Length of a Right Triangle


Pythagoras's Theorem: If $x$ and $y$ denote the lengths of the right-angle sides, then the hypotenuse length $h$ satisfies:

$$ h^2 = x^2 + y^2 \qquad \text{which implies}\qquad \hspace{20 pt} h = \sqrt{ x^2 + y^2 } $$

In [38]:
def hypotenuse(x, y):
    x_squared = x ** 2
    y_squared = y ** 2
    return np.sqrt(x_squared + y_squared)

We could've typed the body all in one line. Do you find this more readable or less readable than the original version?

In [40]:
def hypotenuse(x,y):
    return np.sqrt(x**2 + y ** 2)

In [41]:
hypotenuse(3, 4)

5.0

In [42]:
hypotenuse(1, 2)

2.23606797749979

### Example: A function that takes the year of birth of a person and produces their age in years. ###

In [43]:
def age(year):
    return 2022 - year

In [44]:
age(1999)

23

In [45]:
age(2025)

-3

Now add some bells and whistles:  Take person's name and year of birth (two arguments). Produce a sentence that states how old they are.

In [46]:
def name_and_age(name, year):
    age_person = age(year)
    return f'{name} is {age_person} years old'

In [48]:
name_and_age('Carol', 1945)

'Carol is 77 years old'

### `print` vs `return` ###

- What is the difference between these two functions?

```python
def name_and_age_1(name, year):
    return name + ' is ' + str(age(year)) + ' years old.'
```

```python
def name_and_age_2(name, year):
    print(name + ' is ' + str(age(year)) + ' years old.')
```

In [50]:
output_1 = name_and_age('Carol', 1945)
output_1

'Carol is 77 years old'

In [51]:
type(output_1)

str

In [52]:
def name_and_age_2(name, year):
    print(name + ' is ' + str(age(year)) + ' years old.')

In [53]:
output_2 = name_and_age_2('Carol', 1945)

Carol is 77 years old.


In [54]:
output_2

### Assignments in Functions

In [55]:
def adding_numbers(a, b):
    first_number = a
    second_number = b
    return first_number + second_number

In [56]:
adding_numbers(4, 5)

9

In [57]:
first_numbers

NameError: name 'first_numbers' is not defined

### (Optional) Readability

In [58]:
def my_multiply(a, b):
    '''This function multiplies a and b'''
    return a * b

In [59]:
my_multiply?

In [None]:
my_multiply(

## Apply ##

In [60]:
ages = Table().with_columns(
    'Person', make_array('Jim', 'Pam', 'Michael', 'Creed'),
    'Birth Year', make_array(1985, 1988, 1967, 1904)
)
ages

Person,Birth Year
Jim,1985
Pam,1988
Michael,1967
Creed,1904


In [61]:
make_array(age(ages.column('Birth Year').item(0)),
           age(ages.column('Birth Year').item(1)),
           age(ages.column('Birth Year').item(2)),
           age(ages.column('Birth Year').item(3)))

array([ 37,  34,  55, 118])

In [65]:
ages.apply(age, 'Birth Year')

array([ 37,  34,  55, 118])

In [67]:
age(2000)

22

In [68]:
ages.apply(age(), 'Birth Year')

TypeError: age() missing 1 required positional argument: 'year'

**Multiple Columns**

In [69]:
ages.apply(name_and_age, "Person", "Birth Year")

array(['Jim is 37 years old', 'Pam is 34 years old',
       'Michael is 55 years old', 'Creed is 118 years old'], dtype='<U23')

In [70]:
ages.apply(name_and_age, "Birth Year", "Person")

TypeError: unsupported operand type(s) for -: 'int' and 'numpy.str_'

In [None]:
# Multiple Columns
#

In [None]:
# Will Errror if we swap
#

## Row Objects