In [None]:
from datascience import *
%matplotlib inline
path_data = '../../../assets/data/'
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import numpy as np

# Lecture 13: Conditionals & Iteration

## Comparison ##

In [None]:
# A few examples with comparison operators: ==, !=, <, <=, >, >=
x = 2
y = 3
...

In [None]:
# We can chain the < operator
...

In [None]:
# Notice that the boolean literals (True, False) are capitalized in Python
1 == 1, 2 == 3

**Back to Slides...**

## Comparison with Arrays

In [None]:
s = make_array("John", "John", "Jack", "John")

In [None]:
# The following line says, "Compare each item in s with 'John' and return an array of results"
s == 'John'

In [None]:
# How many items in s are equal to 'John'?
sum(s == 'John')

In [None]:
# How many items in s are NOT equal to 'John'?
sum(s != 'John')

In [None]:
# We can also look at individual comparison results
a = s == 'John'  # a is an array of 'bool' values
a

In [None]:
a.item(2)

In [None]:
a.item(0)

In [None]:
# Every object in Python has a type. What is the type of 2?
type(2)

In [None]:
# What is the type of False?
type(False)

In [None]:
# An example of array comparison using a table column

# These are the Data 8 "Welcome Survey" data from Spring 2022
survey = Table.read_table('welcome_survey_sp22.csv')
survey.show(3)

In [None]:
# How many of the 1343 students reported having more than 0 piercings?
# Plan A: Use .where and .num_rows
count1 = ...
print(count1)

# Plan B: Use .column('Piercings') to get an array, compare each item (>0), and sum up the result
count2 = ...
print(count2)


## Review: What's the difference between = and ==?

**Back to Slides...**

## Rows

In [None]:
survey.show(3)

In [None]:
# We can examine the row at index 2 in the survey table
survey.row(2)

In [None]:
# We can access an item in a row using a column label
r = survey.row(2)
r.item('Pant leg')

In [None]:
# Or, use a column index
r.item(1)

In [None]:
# Does it make sense to sum up all the values in this row?
sum(r)

In [None]:
# We can sum up a row's items if they are all numerical
t = survey.select(1, 2, 3)
t.show(2)

In [None]:
# Sum up the first row
sum(t.row(0))

In [None]:
# An example of where you would really like to add values across a row
p = survey.pivot(4, 5)
p

In [None]:
# Let's add row totals ('Total') to our pivot table as a new column
# Here's a helper function
...
def row_total(r):
    '''Takes a row r of the pivot table and returns the sum of the numbers in r'''
    total = r.item(1) + r.item(2) + r.item(3)
    return total

# A small test
row_total(p.row(0))

In [None]:
# Sneak preview of iterating over the rows of a table
for r in p.rows:
    print(r)

In [None]:
totals = []  # an empty list for holding the totals
for r in p.rows:
    totals.append(row_total(r))
totals

In [None]:
p_with_total = p.with_column('Total', totals)
p_with_total

**Back to slides...**

## Rows & Apply

In [None]:
# t.apply(f) returns an array, the result of applying the function f to each ROW of table t
# Use apply to add a row total column to our pivot table
p

In [None]:
# calculate the array of row totals
p.drop('Pant leg').apply(sum)

In [None]:
totals = p.drop('Pant leg').apply(sum)
p_with_total = p.with_column('Total', totals)
p_with_total

That was so much easier! 

Questions?

**Back to Slides...**

## Conditional Statements

What is the value of `msg` after executing the following code cell?

In [None]:
x = 20
msg = 'You are 20'
if x >= 18:
    msg = 'You can legally vote'  # this line will *sometimes* run
    
msg

In [None]:
x = 20
msg = 'You are 20'
if x >= 21:
    msg = 'You can legally drink alcohol'  # this line will *sometimes* run
    
msg

In [None]:
# Run the same conditional statement but with x set to 25
x = 25

if x >= 21:
    msg = 'You can legally drink alcohol'  # this line will *sometimes* run
    
msg

In [None]:
# In Data Science, we generally ONLY see if statements inside of function definitions
def age_check(x):
    if x >= 18:
        return "You can legally vote"
    if x >= 21:
        return "You can legally drink alcohol"

In [None]:
age_check(18)

In [None]:
# What will age_check(21) yield?
age_check(21)

In [None]:
# What went wrong?

In [None]:
# We can use if...elif...else to write a better function definition
def age_check(x):
    if x >= 21:
        msg = 'You can legally drink and vote'
    elif x >= 18:
        msg = 'You can legally vote, but not drink alcohol'
    else:
        msg = 'You can legally drink milk'
        
    return msg

In [None]:
age_check(21)

In [None]:
age_check(18)

In [None]:
age_check(17)

**Back to Slides...**

## Simulation

Let's play a game. This is the sort of game a stats professor might devise.

  1. We each roll a six-sided die.
  1. If my number is higher, you pay me a dollar.
  1. If your number is higher, I pay you a dollar.
  1. If our numbers are equal, no money changes hands.
  
Say we want to simulate 10000 rounds of this exciting game. How can we do that in Python?

  - Step 1: Simulate two die rolls (first for me, second for you).
  - Step 2: Compute how much money I win as a result.
  - Repeat these two steps 10,000 times and determine the total amount that I win.
  

In [None]:
# A start (work in progress) -- define a function for my winnings on one round
def one_round(my_roll, your_roll):
    if my_roll > your_roll:
        return 1

In [None]:
# So if I roll higher than you, I get a dollar:
one_round(5, 2)

In [None]:
# What if we roll the same?
one_round(1, 1)

In [None]:
# Redefine the function to include two more cases
...
def one_round(my_roll, your_roll):
    ...

### Random Selection

To get the die rolls happening in our simulation, we use random selection: `np.random.choice()`. Let's try a general example first.

In [None]:
morning = make_array('sleep in', 'wake up')
morning

In [None]:
np.random.choice(morning)

In [None]:
np.random.choice(morning)

In [None]:
np.random.choice(morning)

In [None]:
np.random.choice(morning)

In [None]:
# We can use two arguments
np.random.choice(morning, 7)

In [None]:
# What does this code express?
next_week = np.random.choice(morning, 7)
sum(next_week == 'wake up'), sum(next_week == 'sleep in')

In [None]:
# Use np.random.choice to simulate the result of rolling a 6-sided die
# First, make an array of the possible die values
die_faces = np.arange(1,7)
die_faces

In [None]:
np.random.choice(die_faces)

In [None]:
np.random.choice(die_faces)

In [None]:
def simulate_one_round():
    die_faces = np.arange(1,7)
    my_roll = np.random.choice(die_faces)
    your_roll = np.random.choice(die_faces)
    
    return one_round(my_roll, your_roll)

In [None]:
simulate_one_round()

**Back to Slides...**

## Appending Arrays

In [None]:
first = np.arange(4)
first

In [None]:
second = np.arange(10, 17)
second

In [None]:
# Append one additional value
np.append(first, 5)

In [None]:
# Append all the values in a second array
np.append(first, second)

In [None]:
# Let's play our game!
# Make an empty array to hold the results
results = make_array()
results

In [None]:
# Play a round
results = np.append(results, simulate_one_round())
results

In [None]:
# Play a second round
results = np.append(results, simulate_one_round())
results

We'll discuss `for` statements in more detail in the next class. For now, let's just see how `for` will help us simulate our game.

In [None]:
results = make_array()  # start fresh

# Tell Python how many repetitions you want, like 10:
for i in range(10):
    results = np.append(results, simulate_one_round())  # This line will execute 10 times

# Check the effect                        
results

In [None]:
# Simulating 10,000 rounds is as easy as simulating 10 rounds
results = make_array()  # start fresh

# Tell Python how many repetitions you want:
for i in range(10000):
    results = np.append(results, simulate_one_round())  # This line will execute 10 times

# Check the effect                        
results


In [None]:
len(results)

In [None]:
# Final outcome
sum(results)

In [None]:
# We can report the outcomes in a table
game_outcomes = Table().with_column('My Winnings', results)
game_outcomes

In [None]:
# We can view the distribution of My Winnings
game_outcomes.group('My Winnings')

In [None]:
# We can visualize the distribution in a bar graph
game_outcomes.group('My Winnings').barh('My Winnings')