### Statistical Simulations

In [131]:
# Setup
import numpy as np
import pandas as pd

##### 1. How likely is it that you roll doubles when rolling two dice?

In [132]:
dice_outcomes = [1, 2, 3, 4, 5, 6]
n_simulations = 1_000_000
n_trials = 2

roll = np.random.choice(dice_outcomes,  size=(n_simulations, n_trials))
roll[0:5]

array([[1, 1],
       [2, 5],
       [1, 4],
       [4, 3],
       [5, 5]])

In [133]:
roll_t = roll.transpose()
roll_t[0:5]

array([[1, 2, 1, ..., 6, 1, 5],
       [1, 5, 4, ..., 3, 4, 2]])

In [8]:
(roll_t[0] == roll_t[1]).mean()

0.165945

##### 2. If you flip 8 coins, what is the probability of getting exactly 3 heads? What is the probability of getting more than 3 heads?

In [15]:
coin_outcomes = [1, 0]
n_simulations = 1_000_000
n_trials = 8
flips = np.random.choice(coin_outcomes, size=(n_simulations, n_trials))
flips[0:10]

array([[0, 0, 1, 1, 1, 0, 1, 1],
       [0, 1, 0, 0, 0, 1, 1, 0],
       [1, 0, 1, 1, 1, 1, 1, 1],
       [1, 1, 0, 1, 1, 1, 1, 1],
       [0, 0, 0, 1, 1, 0, 0, 1],
       [1, 0, 0, 0, 1, 0, 0, 0],
       [0, 1, 1, 1, 1, 0, 1, 0],
       [0, 1, 0, 1, 0, 0, 0, 0],
       [1, 1, 0, 1, 0, 1, 1, 1],
       [1, 0, 1, 1, 1, 1, 0, 0]])

In [21]:
number_of_heads = flips.sum(axis=1)
number_of_heads[0:10]

array([5, 3, 7, 7, 3, 2, 5, 2, 6, 5])

In [22]:
(number_of_heads == 3).mean()

0.218592

In [137]:
(number_of_heads > 3).mean()

0.637057

##### 3. There are approximitely 3 web development cohorts for every 1 data science cohort at Codeup. Assuming that Codeup randomly selects an alumni to put on a billboard, what are the odds that the two billboards I drive past both have data science students on them?

In [36]:
# Assuming that the cohorts are the same number of students
outcomes = [1, 0] # 1 = data science student; 0 = Web student

web_dev = 0.75
data_sci = 0.25

n_simulations = 100_000
billboards = 2

student = np.random.choice(outcomes, size=(n_simulations, billboards), p=[data_sci, web_dev])

student[0:5]

array([[0, 0],
       [0, 1],
       [0, 0],
       [0, 1],
       [0, 0]])

In [37]:
number_of_students = student.sum(axis=1)
number_of_students

array([0, 1, 0, ..., 0, 0, 0])

In [38]:
(number_of_students == 2).mean()

0.06179

##### 4. Codeup students buy, on average, 3 poptart packages (+- 1.5) a day from the snack vending machine. If on monday the machine is restocked with 17 poptart packages, how likely is it that I will be able to buy some poptarts on Friday afternoon?

In [96]:
pop_tart = np.random.normal(3, 1.5, size=(10_000, 5)) # 10k iterations for the 5 day period

pop_tart[0:5]

pop_tarts_eaten = pop_tart.sum(axis=1)
pop_tarts_eaten[0:5]

(pop_tarts_eaten < 17).mean() #Average less than 17 pop tarts

0.7264

##### 5. Compare Heights

- Men have an average height of 178 cm and standard deviation of 8cm.
- Women have a mean of 170, sd = 6cm.
- Since you have means and standard deviations, you can use np.random.normal to generate observations.
- If a man and woman are chosen at random, P(woman taller than man)?


In [88]:
avg_ht_men = 178
avg_ht_women = 170

men_std = 8
women_std = 6

n_simulations = 100_000

random_ht_man = np.random.normal(avg_ht_men, men_std, n_simulations)
random_ht_woman = np.random.normal(avg_ht_women, women_std, n_simulations)

(random_ht_woman > random_ht_man).mean()

0.2117

##### 6. When installing anaconda on a student's computer, there's a 1 in 250 chance that the download is corrupted and the installation fails. What are the odds that after having 50 students download anaconda, no one has an installation issue? 100 students?

- What is the probability that we observe an installation issue within the first 150 students that download anaconda?

- How likely is it that 450 students all download anaconda without an issue?

In [66]:
corrupted = [1, 0] # 1 = corrupted; 0 = not corrupted
n_simulations = 100_000
n_students = 100

corrupt_anaconda = np.random.choice(corrupted, size=(n_simulations, n_students), p=[1/250, 249/250])
corrupt_anaconda

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [83]:
n_students = 50
corrupt_anaconda = np.random.choice(corrupted, size=(n_simulations, n_students), p=[1/250, 249/250])
total_corrupt = corrupt_anaconda.sum(axis=1)
1- (total_corrupt >=1).mean()

0.81969

In [87]:
n_students = 100
corrupt_anaconda = np.random.choice(corrupted, size=(n_simulations, n_students), p=[1/250, 249/250])
total_corrupt = corrupt_anaconda.sum(axis=1)
1- (total_corrupt >=1).mean()

0.6685

In [85]:
n_students = 150
corrupt_anaconda = np.random.choice(corrupted, size=(n_simulations, n_students), p=[1/250, 249/250])
total_corrupt = corrupt_anaconda.sum(axis=1)
1- (total_corrupt >=1).mean()

0.54837

In [86]:
n_students = 450
corrupt_anaconda = np.random.choice(corrupted, size=(n_simulations, n_students), p=[1/250, 249/250])
total_corrupt = corrupt_anaconda.sum(axis=1)
1- (total_corrupt >=1).mean()

0.16386999999999996

##### 7. There's a 70% chance on any given day that there will be at least one food truck at Travis Park. However, you haven't seen a food truck there in 3 days. How unlikely is this?

- How likely is it that a food truck will show up sometime this week?

In [111]:
# probability that at least one food truck arrives = 0.7
food_truck_arrives = [1, 0] # yes, no
food_sample = np.random.choice(food_truck_arrives, size=(10_000, 3), p=[0.7, 0.3] )
food_sample 

food_sample_sums = food_sample.sum(axis=1)
food_sample_sums

(food_sample_sums == 0).mean() # Probability that food truck no show for three days in a row

0.0288

In [114]:
food_truck_arrives = [1, 0] # yes, no
food_sample = np.random.choice(food_truck_arrives, size=(10_000, 7), p=[0.7, 0.3] )
food_sample

food_sample_sums = food_sample.sum(axis=1)
food_sample_sums

(food_sample_sums > 0).mean() # Probabiliy that a food truck will show up at least one day in a week

0.9998

##### 8. If 23 people are in the same room, what are the odds that two of them share a birthday? What if it's 20 people? 40?

In [None]:
def odds_same_birthday(people_in_room):
    birthdays = list(range(1, 366))
    n_samples = 10_000
    person_1 = np.random.choice(birthdays, size=(n_samples, people_in_room))
    person_2 = np.random.choice(birthdays, size=(n_samples, people_in_room))
    (person_1 == )
                                

In [136]:
    birthdays = list(range(1, 366))
    n_samples = 10_000
    person_1 = np.random.choice(birthdays, size=(n_samples, 40))
    person_2 = np.random.choice(birthdays, size=(n_samples, 40))
    df = pd.DataFrame(person_1)
    df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,39
0,38,188,123,321,122,67,332,145,260,236,...,320,152,29,143,128,169,228,289,330,68
1,335,309,134,219,136,160,314,264,43,170,...,83,190,5,205,322,274,31,158,274,95
2,202,83,212,354,33,249,55,344,152,283,...,62,91,19,274,71,289,213,173,118,46
3,8,18,91,74,114,280,55,119,161,133,...,6,299,343,228,321,77,147,99,28,290
4,37,64,202,156,158,24,78,235,260,101,...,149,267,121,182,310,230,76,331,24,242
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,325,72,320,363,353,362,340,262,284,346,...,267,43,153,294,223,308,251,291,56,123
9996,218,287,21,182,26,96,330,303,328,236,...,273,171,283,54,208,112,45,109,293,362
9997,205,332,326,214,191,365,269,177,27,239,...,323,98,33,212,253,165,124,321,87,266
9998,128,53,80,47,45,62,44,110,180,15,...,101,118,307,190,20,172,275,310,273,235
