# Statistics Exercises

#### 1. How likely is it that you roll doubles when rolling two dice?

In [8]:
import numpy as np
import pandas as pd

np.random.seed(1234)

In [64]:
possible_outcomes = [1,2,3,4,5,6]

n_dice = n_cols = 2
num_trials = n_rows = 10 ** 5

data1 = np.random.choice(possible_outcomes, num_trials * n_dice).reshape(num_trials, n_dice)
data1

array([[1, 1],
       [1, 6],
       [6, 4],
       ...,
       [1, 1],
       [2, 5],
       [1, 5]])

In [65]:
pd.DataFrame(data1).apply(lambda x: x[0] == x[1] in x.values, axis = 1).mean()

0.16781

#### 2. If you flip 8 coins, what is the probability of getting exactly 3 heads? What is the probability of getting more than 3 heads?

In [9]:
heads = 1
tails = 0
outcomes = [0,1]

n_trials = n_rows = 10000
n_coins = n_cols = 8

data2 = np.random.choice(outcomes, n_trials * n_coins).reshape(n_rows, n_cols)
data2

array([[1, 1, 0, ..., 0, 0, 1],
       [1, 1, 1, ..., 0, 1, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 1, 1, ..., 1, 1, 0],
       [0, 1, 1, ..., 1, 0, 1],
       [1, 1, 1, ..., 0, 1, 1]])

In [10]:
total_heads = data2.sum(axis = 1)
total_heads

array([4, 5, 0, ..., 5, 5, 6])

In [11]:
three_heads = (total_heads == 3).mean()
three_heads

0.2159

In [12]:
more_than_three = (total_heads > 3).mean()
more_than_three

0.6338

#### 3. There are approximitely 3 web development cohorts for every 1 data science cohort at Codeup. Assuming that Codeup randomly selects an alumni to put on a billboard, what are the odds that the two billboards I drive past both have data science students on them?

In [18]:
web = 3
data = 1
outcomes = [3,1]

prob_web = 0.75
prob_ds = 1 - prob_web

n_trials = n_rows = 10000
n_billboards = n_cols = 2

data3 = np.random.choice(outcomes, n_trials * n_billboards, p = [prob_web, prob_ds]).reshape(n_rows, n_cols)
data3

array([[3, 3],
       [3, 1],
       [3, 3],
       ...,
       [3, 1],
       [3, 3],
       [3, 3]])

In [19]:
total_alums = data3.sum(axis = 1)
total_alums

array([6, 4, 6, ..., 4, 6, 6])

In [20]:
two_ds = (total_alums == 2).mean()
two_ds

0.0683

#### 4. Codeup students buy, on average, 3 poptart packages (+- 1.5) a day from the snack vending machine. If on monday the machine is restocked with 17 poptart packages, how likely is it that I will be able to buy some poptarts on Friday afternoon?

In [35]:
n_trials = n_rows = 10000
n_days = n_cols = 5

data4 = np.round(np.random.normal(3, 1.5, size = (n_trials, n_days)))
data4

array([[ 3.,  3.,  1.,  1.,  4.],
       [ 4.,  4.,  1.,  2.,  2.],
       [ 1.,  5.,  2.,  4., -0.],
       ...,
       [ 1.,  2.,  3.,  5.,  1.],
       [ 4.,  0.,  2.,  2.,  2.],
       [ 1.,  4.,  2.,  3.,  3.]])

In [36]:
total_packs = data4.sum(axis = 1)
total_packs

array([12., 13., 12., ..., 12., 10., 13.])

In [37]:
poptarts = (total_packs < 17).mean()
poptarts

0.6704

#### 5. Compare Heights

    - Men have an average height of 178 cm and standard deviation of 8cm.
    - Women have a mean of 170, sd = 6cm.
    - If a man and woman are chosen at random, P(woman taller than man)?

In [48]:
n_trials = n_rows = 10000
m_and_w = n_cols = 2

men_height = np.random.normal(178, 8, n_trials)
women_height = np.random.normal(170, 6, n_trials)

np.column_stack((men_height, women_height))

array([[164.3115971 , 166.45997742],
       [172.2472999 , 170.92429178],
       [194.08991429, 172.89307431],
       ...,
       [161.49586611, 176.16147355],
       [169.69936859, 171.7387915 ],
       [190.90833587, 172.26979774]])

In [49]:
height_diff = np.subtract(men_height, women_height)
height_diff

array([ -2.14838032,   1.32300811,  21.19683999, ..., -14.66560745,
        -2.03942291,  18.63853813])

In [50]:
woman_taller = (height_diff < 0).mean()
woman_taller

0.2138

#### 6. When installing anaconda on a student's computer, there's a 1 in 250 chance that the download is corrupted and the installation fails. What are the odds that after having 50 students download anaconda, no one has an installation issue? 100 students?

In [54]:
prob_fail = 1/250
prob_success = 1 - prob_fail

outcomes = [0,1]

n_trials = n_rows = 10000
n_downloads = n_cols = 50

data6 = np.random.choice(outcomes, n_trials * n_downloads, p = [prob_fail, prob_success]).reshape(n_rows, n_cols)
data6

array([[1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       ...,
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1]])

In [55]:
installs = data6.sum(axis = 1)
installs

array([50, 50, 50, ..., 50, 50, 50])

In [56]:
fails = (installs < 50).mean()
fails

0.1824

#### What is the probability that we observe an installation issue within the first 150 students that download anaconda?

#### 7. There's a 70% chance on any given day that there will be at least one food truck at Travis Park. However, you haven't seen a food truck there in 3 days. How unlikely is this?

In [57]:
truck = 0.7
no_truck = 1 - truck

outcomes = [1,0]

n_trials = n_rows = 10000
n_days = n_cols = 3

data7 = np.random.choice(outcomes, n_trials * n_days, p = [truck, no_truck]).reshape(n_rows, n_cols)
data7

array([[0, 1, 1],
       [0, 1, 1],
       [0, 1, 1],
       ...,
       [1, 1, 1],
       [1, 1, 1],
       [0, 1, 1]])

In [58]:
truck_totals = data7.sum(axis = 1)
truck_totals

array([2, 2, 2, ..., 3, 3, 2])

In [59]:
no_truck_chance = (truck_totals == 0).mean()
no_truck_chance

0.0265

#### How likely is it that a food truck will show up sometime this week?

#### 8. If 23 people are in the same room, what are the odds that two of them share a birthday? What if it's 20 people? 40?

In [60]:
bday = range(1,366)

n_trials = n_rows = 10000
n_people = n_cols = 23

data8 = np.random.choice(bday, n_trials * n_people).reshape(n_rows, n_cols)
data8

array([[ 91, 347, 263, ...,  17,  64, 289],
       [180, 143, 298, ...,  13, 364, 134],
       [ 73, 361,  88, ...,  36,  24, 113],
       ...,
       [161,  84, 253, ...,  54, 136, 276],
       [218, 167,  83, ..., 202, 181, 229],
       [281, 115, 340, ..., 149, 327, 146]])

In [66]:
bdays23_df = pd.DataFrame(data8)
bdays23_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
0,91,347,263,266,324,186,209,349,266,118,...,229,27,27,46,257,246,108,17,64,289
1,180,143,298,115,110,251,66,86,118,265,...,256,106,246,341,179,273,304,13,364,134
2,73,361,88,110,240,361,76,131,153,4,...,305,297,36,122,282,67,111,36,24,113
3,309,224,327,165,321,82,190,138,325,263,...,133,301,296,230,32,292,129,200,263,341
4,227,289,256,218,299,23,365,41,108,129,...,100,293,338,65,169,305,361,247,233,276


In [67]:
same_day23 = bdays23_df.nunique(axis = 1)
same_day23

0       21
1       23
2       21
3       22
4       23
        ..
9995    21
9996    21
9997    23
9998    22
9999    22
Length: 10000, dtype: int64

In [68]:
same23 = (same_day23 < 23).mean()
same23

0.5166