# Simulation Exercises

In [1]:
import numpy as np
import pandas as pd
np.random.seed(29)

### How likely is it that you roll doubles when rolling two dice?

In [2]:
n_trials = nrows = 10_000
n_dice = ncols = 2

rolls = np.random.choice([1, 2, 3, 4, 5, 6], n_trials * n_dice).reshape(nrows, ncols)
rolls

array([[6, 4],
       [5, 6],
       [3, 1],
       ...,
       [2, 5],
       [1, 4],
       [4, 6]])

In [3]:
doubles = []
for roll in rolls:
    if roll[0] == roll[1]:
        doubles.append(1)
    else:
        doubles.append(0)

percent_doubles = sum(doubles)/len(doubles)
print(f"Based on a simulation of 10,000 rolls of two dice, the probability of rolling doubles is {percent_doubles}.")

Based on a simulation of 10,000 rolls of two dice, the probability of rolling doubles is 0.1709.


### If you flip 8 coins, what is the probability of getting exactly 3 heads? What is the probability of getting more than 3 heads?

In [4]:
n_trials = nrows = 10_000
n_coins = ncols = 8

flips = np.random.choice([0,1], n_trials * n_coins).reshape(nrows, ncols)
flips

array([[0, 0, 0, ..., 0, 1, 1],
       [0, 0, 1, ..., 1, 1, 0],
       [1, 0, 1, ..., 1, 1, 1],
       ...,
       [0, 1, 0, ..., 1, 1, 0],
       [1, 0, 0, ..., 1, 1, 1],
       [1, 0, 0, ..., 0, 1, 0]])

In [5]:
sums_by_trial = flips.sum(axis=1)
sums_by_trial

array([3, 5, 6, ..., 3, 6, 2])

In [6]:
exactly_3 = sums_by_trial == 3
exactly_3

array([ True, False, False, ...,  True, False, False])

In [7]:
exactly_3_rate = exactly_3.astype(int).mean()
print(f"Based on a simulation of 10,000 trials of 8 consecutive coin flips, the probability of flipping exactly three heads is {exactly_3_rate}.")

Based on a simulation of 10,000 trials of 8 consecutive coin flips, the probability of flipping exactly three heads is 0.2243.


### There are approximitely 3 web development cohorts for every 1 data science cohort at Codeup. Assuming that Codeup randomly selects an alumni to put on a billboard, what are the odds that the two billboards I drive past both have data science students on them?

In [8]:
p_datasci = 0.25
n_boards = n_cols = 2
n_drives = n_rows = 10 ** 5

data = np.random.random((n_rows, n_cols))
data

array([[0.52367416, 0.30110826],
       [0.22483308, 0.89903466],
       [0.32563666, 0.01721882],
       ...,
       [0.46862724, 0.28377202],
       [0.62598687, 0.95912283],
       [0.44980969, 0.88367756]])

In [9]:
datasci_sighting = data < p_datasci
datasci_sighting

array([[False, False],
       [ True, False],
       [False,  True],
       ...,
       [False, False],
       [False, False],
       [False, False]])

In [10]:
datasci_sighting.sum(axis=1)

array([0, 1, 1, ..., 0, 0, 0])

In [11]:
(datasci_sighting.sum(axis=1) >= 2).mean()
print(f"Based on 10,000 simulated drives by two billboards, the probability of both billboards containing data science students is {(datasci_sighting.sum(axis=1) >= 2).mean()}.")

Based on 10,000 simulated drives by two billboards, the probability of both billboards containing data science students is 0.06325.


### Codeup students buy, on average, 3 poptart packages (+- 1.5) a day from the snack vending machine. If on monday the machine is restocked with 17 poptart packages, how likely is it that I will be able to buy some poptarts on Friday afternoon?

In [12]:
friday_stock = []
for num in range(10000):
    stock = 17
    for n in range(5):
        stock -= np.random.uniform(low=1.5, high=4.5)
    friday_stock.append(stock)

In [13]:
in_stock = []
for stock in friday_stock:
    if stock >= 1:
        in_stock.append(1)
    else:
        in_stock.append(0)

In [14]:
chance_for_tart = sum(in_stock)/len(in_stock)
print(f"Based on a simulation of 10,000 weeks, the chance that there is at least one pop tart remaining in stock after 5 days of purchases is {chance_for_tart}.")

Based on a simulation of 10,000 weeks, the chance that there is at least one pop tart remaining in stock after 5 days of purchases is 0.6979.


### Compare Heights

#### Men have an average height of 178 cm and standard deviation of 8cm.
#### Women have a mean of 170, sd = 6cm.
#### If a man and woman are chosen at random, P(woman taller than man)?

In [15]:
man_height = np.random.normal(178, 8, size=(10000,1)).tolist()
woman_height = np.random.normal(170,6, size=(10000,1)).tolist()

In [16]:
woman_taller = []
for i in range(len(man_height)):
    if woman_height[i][0] > man_height[i][0]:
        woman_taller.append(1)
    else:
        woman_taller.append(0)

In [17]:
chance_woman_taller = sum(woman_taller)/len(woman_taller)
print(f"After simulating 10,000 comparisons of a random woman to a random man, the probability that any given woman is taller than any given man is {chance_woman_taller}.")

After simulating 10,000 comparisons of a random woman to a random man, the probability that any given woman is taller than any given man is 0.2126.


### When installing anaconda on a student's computer, there's a 1 in 250 chance that the download is corrupted and the installation fails. What are the odds that after having 50 students download anaconda, no one has an installation issue? 100 students?

In [18]:
n_trials = 10000
n_students = 50
p_fail = 1/250

installs = np.random.random((n_trials,n_students))

In [19]:
install_failure = installs < 0.004
install_failure

array([[False, False, False, ..., False,  True, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [20]:
(install_failure.sum(axis=1) >= 1)

array([ True, False, False, ...,  True, False,  True])

In [21]:
print(f"After running 10,000 simulations, the probability that there are no failures in a group of 50 students: {1-(install_failure.sum(axis=1)>=1).mean()}")

After running 10,000 simulations, the probability that there are no failures in a group of 50 students: 0.8148


In [22]:
n_students = 100
installs = np.random.random((n_trials,n_students))
install_failure = installs < 0.004
print(f"After running 10,000 simulations, the probability that there are no failures in a group of 100 students: {1-(install_failure.sum(axis=1)>=1).mean()}")

After running 10,000 simulations, the probability that there are no failures in a group of 100 students: 0.6672


### What is the probability that we observe an installation issue within the first 150 students that download anaconda?

In [23]:
n_students = 150
installs = np.random.random((n_trials,n_students))
install_failure = installs < 0.004
print(f"After running 10,000 simulations, the probability that there are no failures in a group of 150 students: {1-(install_failure.sum(axis=1)>=1).mean()}")

After running 10,000 simulations, the probability that there are no failures in a group of 150 students: 0.5463


### How likely is it that 450 students all download anaconda without an issue?

In [24]:
n_students = 450
installs = np.random.random((n_trials,n_students))
install_failure = installs < 0.004
print(f"After running 10,000 simulations, the probability that there are no failures in a group of 450 students: {1-(install_failure.sum(axis=1)>=1).mean()}")

After running 10,000 simulations, the probability that there are no failures in a group of 450 students: 0.16879999999999995


### There's a 70% chance on any given day that there will be at least one food truck at Travis Park. However, you haven't seen a food truck there in 3 days. How unlikely is this?

In [25]:
p_food = .70
n_trials = 10000
n_days = 3

food_truck = np.random.random((n_trials, n_days))
food_truck

array([[0.85769224, 0.80032733, 0.71057881],
       [0.49072275, 0.20748525, 0.43164126],
       [0.9754546 , 0.48757196, 0.68411212],
       ...,
       [0.72868148, 0.43491399, 0.25175779],
       [0.95673347, 0.83199687, 0.67132019],
       [0.97338735, 0.03091319, 0.48831878]])

In [26]:
food_truck_present = food_truck < p_food
food_truck_present

array([[False, False, False],
       [ True,  True,  True],
       [False,  True,  True],
       ...,
       [False,  True,  True],
       [False, False,  True],
       [False,  True,  True]])

In [27]:
(food_truck_present.sum(axis=1) == 0)

array([ True, False, False, ..., False, False, False])

In [28]:
no_food_3_days = (food_truck_present.sum(axis=1) == 0).mean()
no_food_3_days

0.0278

In [29]:
print(f"After 10,000 simulations, the probability that there would be no food truck for all 3 days: {no_food_3_days}")

After 10,000 simulations, the probability that there would be no food truck for all 3 days: 0.0278


### How likely is it that a food truck will show up sometime this week?

In [30]:
p_food = .70
n_trials = 10000
n_days = 7

food_truck = np.random.random((n_trials, n_days))
food_truck

array([[0.82362966, 0.48518064, 0.59832284, ..., 0.36197704, 0.35796501,
        0.21457426],
       [0.67455267, 0.32256746, 0.93606459, ..., 0.2946364 , 0.48016794,
        0.92708789],
       [0.84056316, 0.21086955, 0.17370007, ..., 0.81261892, 0.52686485,
        0.01686217],
       ...,
       [0.94971857, 0.44650045, 0.80551779, ..., 0.76847661, 0.10800821,
        0.42607581],
       [0.65066617, 0.76981336, 0.06679755, ..., 0.95101963, 0.94136017,
        0.69983771],
       [0.56952336, 0.18027006, 0.82738404, ..., 0.54219219, 0.03063603,
        0.83949336]])

In [31]:
food_truck_present = food_truck < p_food
food_truck_present

array([[False,  True,  True, ...,  True,  True,  True],
       [ True,  True, False, ...,  True,  True, False],
       [False,  True,  True, ..., False,  True,  True],
       ...,
       [False,  True, False, ..., False,  True,  True],
       [ True, False,  True, ..., False, False,  True],
       [ True,  True, False, ...,  True,  True, False]])

In [32]:
(food_truck_present.sum(axis=1) >= 1)

array([ True,  True,  True, ...,  True,  True,  True])

In [33]:
at_least_1_food_7_days = (food_truck_present.sum(axis=1) >= 1).mean()
at_least_1_food_7_days

0.9998

In [34]:
print(f"The probability that there will be at least one food truck present at some point during any given week: {at_least_1_food_7_days}")

The probability that there will be at least one food truck present at some point during any given week: 0.9998


### If 23 people are in the same room, what are the odds that two of them share a birthday?

In [35]:
birthdays = list(range(1,366))
n_trials = 10_000
n_students = 23

rooms = np.random.choice(birthdays,n_trials * n_students).reshape(n_trials, n_students)
rooms

array([[ 87, 165, 326, ..., 133, 256, 316],
       [ 63, 110, 176, ..., 304,   2,  80],
       [222, 232,  97, ..., 355, 152, 117],
       ...,
       [334, 115, 357, ..., 253,  37, 168],
       [127, 304, 102, ..., 352,  74, 130],
       [223, 291, 127, ...,  19, 120, 142]])

In [36]:
matches = []
for room in rooms:
    if len(set(room)) != len(room):
        matches.append(1)
    else:
        matches.append(0)
        
percent_matches = sum(matches)/len(matches)
print(f"Based on 10,000 simulations, in a room of 23 people, assuming perfectly equal distribution among birthdays, the probability that there is at least one shared birthday: {percent_matches}")

Based on 10,000 simulations, in a room of 23 people, assuming perfectly equal distribution among birthdays, the probability that there is at least one shared birthday: 0.5055


### What if it's 20 people? 

In [37]:
birthdays = list(range(1,366))
n_trials = 10_000
n_students = 20

rooms = np.random.choice(birthdays,n_trials * n_students).reshape(n_trials, n_students)
rooms

array([[306, 195,  73, ..., 121,  97,  51],
       [ 42,  59, 210, ..., 167,  41, 157],
       [ 53, 117, 119, ..., 233, 231, 294],
       ...,
       [  5, 246, 350, ...,  45, 185, 275],
       [349, 310, 339, ..., 233,  29, 252],
       [354, 303, 289, ...,  62,  82, 316]])

In [38]:
matches = []
for room in rooms:
    if len(set(room)) != len(room):
        matches.append(1)
    else:
        matches.append(0)
        
percent_matches = sum(matches)/len(matches)
print(f"Based on 10,000 simulations, in a room of 20 people, assuming perfectly equal distribution among birthdays, the probability that there is at least one shared birthday: {percent_matches}")

Based on 10,000 simulations, in a room of 20 people, assuming perfectly equal distribution among birthdays, the probability that there is at least one shared birthday: 0.411


### 40?

In [39]:
birthdays = list(range(1,366))
n_trials = 10_000
n_students = 40

rooms = np.random.choice(birthdays,n_trials * n_students).reshape(n_trials, n_students)
rooms

array([[ 66, 107,  97, ..., 324, 115,  53],
       [144,  10, 148, ..., 100, 186,  81],
       [136,  15, 246, ..., 124, 178, 224],
       ...,
       [167, 267,  29, ..., 169, 186, 181],
       [351, 294, 228, ..., 281, 152, 106],
       [120,  59, 123, ..., 131, 257, 116]])

In [40]:
matches = []
for room in rooms:
    if len(set(room)) != len(room):
        matches.append(1)
    else:
        matches.append(0)
        
percent_matches = sum(matches)/len(matches)
print(f"Based on 10,000 simulations, in a room of 40 people, assuming perfectly equal distribution among birthdays, the probability that there is at least one shared birthday: {percent_matches}")

Based on 10,000 simulations, in a room of 40 people, assuming perfectly equal distribution among birthdays, the probability that there is at least one shared birthday: 0.89
