In [1]:
import numpy as np
import pandas as pd

### 1. How likely is it that you roll doubles when rolling two dice?

In [2]:
outcomes = [1, 2, 3, 4, 5, 6]

n_simulations = 100_000
n_trials = 2

rolls = np.random.choice(outcomes, size = (n_simulations, n_trials))
rolls

array([[2, 6],
       [5, 2],
       [3, 3],
       ...,
       [5, 5],
       [5, 4],
       [3, 1]])

In [3]:
# Since we know there are two trials, we can check that the trials in each simulation are equal to each other
def check_doubles(rolls):
    doubles_list = []
    for sim in rolls:
        doubles_list.append(sim[0] == sim[1])
        
    return doubles_list

# Boolean array if simulation is a double
doubles_sims = check_doubles(rolls)
doubles_sims

[False,
 False,
 True,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 False,
 True,
 False,
 True,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 True,
 False,
 False,
 True,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 F

In [55]:
# Convert boolean list to np array and find mean for probability
print(f"The probability of doubles is:  {np.array(doubles_sims).mean() * 100}%")

The probability of doubles is:  16.541%


In [203]:
# Alternative solution

# A length of 1 from np.unique(rolls[n]) means a double was rolled
doubles_list = [len(np.unique(rolls[n])) for n in range(0, n_simulations - 1) if len(np.unique(rolls[n])) == 1]

len(doubles_list) / n_simulations

0.16541

### 2. If you flip 8 coins, what is the probability of getting exactly 3 heads? What is the probability of getting more than 3 heads?

In [5]:
# Heads is 1
outcomes = [0, 1]

n_simulations = 100_000
n_trials = 8

flips = np.random.choice(outcomes, size = (n_simulations, n_trials))
sum_of_flips = flips.sum(axis = 1)

In [6]:
# Probability of exactly 3 heads
prob_3_heads = (sum_of_flips == 3).mean()
print(f"The probability of flipping exactly three heads is {prob_3_heads * 100}%")

The probability of flipping exactly three heads is 21.931%


In [205]:
# Probability of more than three heads
prob_over_3_heads = (sum_of_flips > 3).mean()
print(f"The probability of flipping more than three heads is {round(prob_over_3_heads * 100, 2)}%")

The probability of flipping more than three heads is 63.55%


### 3. There are approximitely 3 web development cohorts for every 1 data science cohort at Codeup. Assuming that Codeup randomly selects an alumni to put on a billboard, what are the odds that the two billboards I drive past both have data science students on them?

In [8]:
# Data science is 1
outcomes = [0, 1]

n_simulations = 100_000
n_trials = 2

combos = np.random.choice(outcomes, size = (n_simulations, n_trials), p = [.75, .25])
combos

array([[0, 0],
       [1, 0],
       [0, 0],
       ...,
       [0, 0],
       [0, 0],
       [1, 0]])

In [9]:
prob_all_data_sci = (combos.sum(axis = 1) == 2).mean()
print(f"The probability both billboards feature data science students:  {prob_all_data_sci * 100}%")

The probability both billboards feature data science students:  6.202%


### 4. Codeup students buy, on average, 3 poptart packages (+- 1.5) a day from the snack vending machine. If on monday the machine is restocked with 17 poptart packages, how likely is it that I will be able to buy some poptarts on Friday afternoon?

In [49]:
n_simulations = 100_000
n_trials = 5

daily_purchases = np.random.normal(loc = 3, scale = 1.5, size = (n_simulations, n_trials))
weekly_purchases = daily_purchases.sum(axis = 1)
prob_poptarts_left = (weekly_purchases < 17).mean()

print(f"There is a {round(prob_poptarts_left * 100, 2)}% chance there are poptarts left.")

[ True  True False ...  True  True  True]
There is a 72.36% chance there are poptarts left.


### 5. Compare Heights
#### * Men have an average height of 178 cm and standard deviation of 8cm.
#### * Women have a mean of 170, sd = 6cm.
#### * If a man and woman are chosen at random, P(woman taller than man)?

In [42]:
male_outcomes = np.random.normal(loc = 178, scale = 8, size = n_simulations)
female_outcomes = np.random.normal(loc = 170, scale = 6, size = n_simulations)
n_simulations = 100_000

male_simulations = np.random.choice(male_outcomes, size = n_simulations)
male_simulations

array([175.35456557, 182.6823537 , 166.79429982, ..., 179.73513815,
       165.12255508, 182.18078681])

In [43]:
female_simulations = np.random.choice(female_outcomes, size = n_simulations)
female_simulations

array([154.94791467, 186.12867023, 175.87307265, ..., 172.87807605,
       172.79784056, 160.64121851])

In [56]:
prob_taller_female = np.greater(female_simulations, male_simulations).mean()
print(f"Probability that female is taller:  {prob_taller_female * 100}%")

Probability that female is taller:  21.204%


### 6. When installing anaconda on a student's computer, there's a 1 in 250 chance that the download is corrupted and the installation fails. What are the odds that after having 50 students download anaconda, no one has an installation issue?

In [104]:
# 1 is a failed installation
outcomes = [0, 1]
failure_odds = [249/250, 1/250]
n_simulations = 100_000

installations_50 = np.random.choice(outcomes, size = (n_simulations, 50), p = failure_odds)

all_successful_50 = installations_50.sum(axis = 1) == 0
all_successful_50_prob = all_successful_50.mean()

print(f"Probability 50 installations all successful:  {round(all_successful_50_prob * 100, 2)}%")

Probability 50 installations all successful:  81.58%


#### * 100 students?

In [103]:
installations_100 = np.random.choice(outcomes, size = (n_simulations, 100), p = failure_odds)

all_successful_100 = installations_100.sum(axis = 1) == 0
all_successful_100_prob = all_successful_100.mean()

print(f"Probability 100 installations all successful:  {round(all_successful_100_prob * 100, 2)}%")

Probability 100 installations all successful:  66.95%


#### * What is the probability that we observe an installation issue within the first 150 students that download anaconda?

In [102]:
installations_150 = np.random.choice(outcomes, size =(n_simulations, 150), p = failure_odds)

failures_within_150 = installations_150.sum(axis = 1) > 0
failures_within_150_prob = failures_within_150.mean()

print(f"Probability failure occurs within 150 installations:  {round(failures_within_150_prob * 100, 2)}%")

Probability failure occurs within 150 installations:  45.07%


#### * How likely is it that 450 students all download anaconda without an issue?

In [106]:
installations_450 = np.random.choice(outcomes, size = (n_simulations, 450), p = failure_odds)

all_successful_450 = installations_450.sum(axis = 1) == 0
all_successful_450_prob = all_successful_450.mean()

print(f"Probability 450 installations all successful:  {round(all_successful_450_prob * 100, 2)}%")

Probability 450 installations all successful:  16.5%


### 7. There's a 70% chance on any given day that there will be at least one food truck at Travis Park. However, you haven't seen a food truck there in 3 days. How unlikely is this?

In [210]:
# 1 is at least 1 food truck present
outcomes = [0, 1]
n_simulations = 1_000_000
outcome_probabilities = [.3, .7]

three_day_simulations = np.random.choice(outcomes, size = (n_simulations, 3), p = outcome_probabilities)

no_food_truck_simulations = three_day_simulations.sum(axis = 1) == 0
no_food_truck_prob = no_food_truck_simulations.mean()

print(f"Probability of no food truck 3 days straight:  {round(no_food_truck_prob * 100, 2)}%")

Probability of no food truck 3 days straight:  2.7%


#### * How likely is it that a food truck will show up sometime this week? (in the remaining days of the week)

In [211]:
# 4 trials since there are 4 days left in the week
week_simulations = np.random.choice(outcomes, size = (n_simulations, 4), p = outcome_probabilities)

food_truck_weeks = week_simulations.sum(axis = 1) > 0
food_truck_weeks_prob = food_truck_weeks.mean()

print(f"Probability of a food truck within the week:  {round(food_truck_weeks_prob * 100, 2)}%")

Probability of a food truck within the week:  99.19%


### 8. If 23 people are in the same room, what are the odds that two of them share a birthday? 

In [212]:
outcomes = list(range(1, 366))
n_simulations = 100_000

simulations = np.random.choice(outcomes, size = (n_simulations, 23))

simulations_df = pd.DataFrame(simulations)
shared_birthday_23 = simulations_df.nunique(axis = 1) < 23
shared_birthday_23_prob = shared_birthday_23.mean()

print(f"Probability two people share a birthday in a room of 23 people:  {round(shared_birthday_23_prob * 100, 2)}%")

# Can also use the np.unique() method in question 1

Probability two people share a birthday in a room of 23 people:  50.88%


#### * What if it's 20 people?

In [213]:
simulations = np.random.choice(outcomes, size = (n_simulations, 20))
simulations_df = pd.DataFrame(simulations)

shared_birthday_20 = simulations_df.nunique(axis = 1) < 20
shared_birthday_20_prob = shared_birthday_20.mean()

print(f"Probability two people share a birthday in a room of 20 people:  {round(shared_birthday_20_prob * 100, 2)}%")

Probability two people share a birthday in a room of 20 people:  41.01%


#### * 40?

In [214]:
simulations = np.random.choice(outcomes, size = (n_simulations, 40))
simulations_df = pd.DataFrame(simulations)

shared_birthday_40 = simulations_df.nunique(axis = 1) < 40
shared_birthday_40_prob = shared_birthday_40.mean()

print(f"Probability two people share a birthday in a room of 40 people:  {round(shared_birthday_40_prob * 100, 2)}%")

Probability two people share a birthday in a room of 40 people:  89.22%
