In [None]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
import warnings
warnings.simplefilter(action='ignore',category=np.VisibleDeprecationWarning)

## Lecture 10 ##

### Functions Can Take Multiple Arguments ###

#### Example: Take person's name and year of birth (two arguments). Produce a sentence that states how old they are. ###
Functions can also be defined inside of other functions. (addressing question from Wednesday)

In [None]:
def name_and_age(name, year):
    def age(year):
        age = 2023 - year
        return age
    return name + ' is ' + str(age(year)) + ' years old.'

In [None]:
name_and_age('Joe', 1942)

In [None]:
# This cell will throw an error
age(1942)

In [None]:
def age(year):
    age = 2023 - year
    return age

In [None]:
age(1942)

More on functions being type agnostic (another question from Wednesday)

In [None]:
def triple(x: int):
    '''
    Triple x

    x: integer to be tripled
    '''
    return 3*x

In [None]:
triple(3)

## Apply ##

In [None]:
ages = Table().with_columns(
    'Person', make_array('Jim', 'Pam', 'Michael', 'Creed'),
    'Birth Year', make_array(1985, 1988, 1967, 1904)
)
ages

In [None]:
ages.apply(age, 'Birth Year')

In [None]:
age(ages.column('Birth Year'))

In [None]:
ages.apply(name_and_age, 'Person', 'Birth Year')

In [None]:
# This cell will throw an error
name_and_age(ages.column('Person'),ages.column('Birth Year'))

In [None]:
make_array(name_and_age(ages.column('Person').item(0),ages.column('Birth Year').item(0)),
          name_and_age(ages.column('Person').item(1),ages.column('Birth Year').item(1)),
          name_and_age(ages.column('Person').item(2),ages.column('Birth Year').item(2)),
          name_and_age(ages.column('Person').item(3),ages.column('Birth Year').item(3)))

## Prediction ##

In [None]:
families = Table.read_table('family_heights.csv')
families

In [None]:
parent_avgs = (families.column('father') + families.column('mother'))/2
parent_avgs

In [None]:
heights = Table().with_columns(
    'Parent Average', parent_avgs,
    'Child', families.column('child'),
    'Sex', families.column('sex')
)
heights

In [None]:
heights.scatter('Parent Average', 'Child')

In [None]:
heights.scatter('Parent Average', 'Child')
plots.plot([67.5, 67.5], [50, 85], color='red', lw=2)
plots.plot([68.5, 68.5], [50, 85], color='red', lw=2);

In [None]:
nearby = heights.where('Parent Average', are.between(67.5, 68.5))
nearby_mean = np.average(nearby.column('Child'))
nearby_mean

In [None]:
heights.scatter('Parent Average', 'Child')
plots.plot([67.5, 67.5], [50, 85], color='red', lw=2)
plots.plot([68.5, 68.5], [50, 85], color='red', lw=2)
plots.scatter(68, nearby_mean, color='red', s=50);

**Activity:** What if you wanted to predict the height of a child for parents of average height 68, 70, and 73? Define a function in the cell below to help us do this. 

In [None]:
def predict(h):
    ...
    return ...

In [None]:
predict(68)

In [None]:
predict(70)

In [None]:
predict(73)

**Activity:** How would you make a prediction for each row of parents in the `heights` table? Fill in the cell below.

In [None]:
predicted_heights = ...

In [None]:
heights = heights.with_column('Prediction', predicted_heights)
heights

In [None]:
heights.select('Parent Average', 'Child', 'Prediction').scatter('Parent Average')

## Prediction Accuracy ##

In [None]:
def difference(x, y):
    return x - y

In [None]:
pred_errs = heights.apply(difference, 'Prediction', 'Child')
heights = heights.with_column('errors',pred_errs)
heights

In [None]:
heights.hist('errors')

In [None]:
heights.hist('errors', group='Sex')

# Discussion Questions
How could we take sex of the child into account when making predictions? 
Does that lead to smaller errors?

In [None]:
def predict_smarter(h, s):
    nearby = heights...
    nearby_same_sex = nearby...
    return np.average(nearby_same_sex.column('Child'))

In [None]:
predict_smarter(68, 'female')

In [None]:
predict_smarter(68, 'male')

In [None]:
smarter_predicted_heights = heights.apply(predict_smarter, 'Parent Average', 'Sex')
heights = heights.with_column('Smarter Prediction', smarter_predicted_heights)

In [None]:
smarter_pred_errs = heights.apply(difference, 'Child', 'Smarter Prediction')
heights = heights.with_column('Smarter Errors', smarter_pred_errs)

In [None]:
heights.hist('Smarter Errors', group='Sex')

## Grouping by One Column ##

In [None]:
cones = Table.read_table('cones.csv').drop('Color')
cones

In [None]:
cones.group('Flavor')

In [None]:
cones.group('Flavor', np.average)

In [None]:
cones.group('Flavor', np.min)

## Grouping By One Column: Welcome Survey ##

In [None]:
survey = Table.read_table('welcome_survey_fa23.csv')
survey.show(3)
def remove_plus(string):
    return string.strip(' or more')
survey = survey.with_columns('Number of textees',survey.apply(remove_plus,'Number of textees').astype(int))

In [None]:
survey.hist('Extroversion')

In [None]:
by_extra = survey.group('Extroversion', np.average)
by_extra

In [None]:
by_extra.plot('Extroversion', 'Number of textees average')

In [None]:
survey.group("Sleep position")

In [None]:
(survey.select("Sleep position", "Hours of sleep")
 .group(0, np.average))

## Grouping by Two Columns ##

![Do right-handed people tend to sleep on their left side and left-handed people sleep on their right?](handed.png)

In [None]:
survey.group(['Handedness', 'Sleep position']).show()

## Lists

In [None]:
[1, 5, 'hello', 5.0]

In [None]:
[1, 5, 'hello', 5.0, make_array(1,2,3)]