**Bernoulli & Binomial with Scipy.stats**

In [1]:
# Import the bernoulli object from scipy.stats
from scipy.stats import bernoulli
import numpy as np 

# Set the random seed to reproduce the results
np.random.seed(42)

# Simulate one coin flip with 35% chance of getting heads
coin_flip = bernoulli.rvs(p=0.35, size=1)
print(coin_flip)

[1]


In [2]:
# Import the bernoulli object from scipy.stats
from scipy.stats import bernoulli

# Set the random seed to reproduce the results
np.random.seed(42)

# Simulate ten coin flips and get the number of heads
ten_coin_flips = bernoulli.rvs(p=0.35, size=10)
coin_flips_sum = sum(ten_coin_flips)

print(coin_flips_sum)


4


In [7]:
from scipy.stats import binom
# Set the random seed to reproduce the results
np.random.seed(42)

# Simulate 20 trials of 10 coin flips 
draws = binom.rvs(n=10, p=0.35, size=20)
print(draws)
print(np.mean(draws))

[3 6 4 4 2 2 1 5 4 4 1 6 5 2 2 2 3 4 3 3]
3.3


In [8]:
temp = []
np.random.seed(42)

for x in range(20):
  ten_coin_flips = bernoulli.rvs(p=0.35, size=10)
  coin_flips_sum = sum(ten_coin_flips)
  temp.append(coin_flips_sum)
print(temp)
print(np.mean(temp))

[4, 2, 1, 4, 2, 5, 3, 5, 3, 3, 2, 5, 4, 3, 1, 5, 1, 3, 4, 6]
3.3


In [10]:
#PMF, probability of getting k heads from n coin flips with p probability , 
#CDF, range of probability at <=k
#binom.pmf(k, n, p )calculates the probability of having exactly k heads out of n coin flips.
#binom.cdf() calculates the probability of having k heads or less out of n coin flips.
#binom.sf() calculates the probability of having more than k heads out of n coin flips.

# Probability of getting exactly 1 defective component
prob_one_defect = binom.pmf(k=1, n=50, p=0.02)
print(prob_one_defect)

# Probability of not getting any defective components
prob_no_defects = binom.pmf(k=0, n=50, p=0.02)
print(prob_no_defects)

# Probability of getting 2 or less defective components
prob_two_or_less_defects = binom.cdf(k=2, n=50, p=0.02)
print(prob_two_or_less_defects)


0.37160171437461204
0.3641696800871171
0.9215722516490308


**Expected Value & Variance**

In [15]:
from scipy.stats import describe
# Sample mean from a generated sample of 100 fair coin flips
sample_of_100_flips = binom.rvs(n=1, p=0.50, size=100)
sample_mean_100_flips = describe(sample_of_100_flips).mean
print(sample_mean_100_flips)

# Sample mean from a generated sample of 1,000 fair coin flips
sample_mean_1000_flips = describe(binom.rvs(n=1, p=0.5, size=1000)).mean
print(sample_mean_1000_flips)

# Sample mean from a generated sample of 2,000 fair coin flips
sample_mean_2000_flips = describe(binom.rvs(n=1, p=0.5, size=2000)).mean
print(sample_mean_2000_flips)

sample = binom.rvs(n=10, p=0.3, size=2000)

# Calculate the sample mean and variance from the sample variable
sample_describe = describe(sample)

# Calculate the sample mean using the values of n and p
mean = 10*0.3

# Calculate the sample variance using the value of 1-p
variance = mean*0.7

# Calculate the sample mean and variance for 10 coin flips with p=0.3
binom_stats = binom.stats(n=10, p=0.3)

print(sample_describe.mean, sample_describe.variance, mean, variance, binom_stats)

0.53
0.486
0.4875
2.941 1.9484932466233116 3.0 2.0999999999999996 (array(3.), array(2.1))


In [18]:
averages=[]
variances=[]
for i in range(0, 1500):
    # 10 trials of 10 coin flips with 25% probability of heads
    sample = binom.rvs(n=10, p=0.25, size=10)
    # Mean and variance of the values in the sample variable
    averages.append(describe(sample).mean)
    variances.append(describe(sample).variance)

# Calculate the mean of the averages variable
print("Mean {}".format(describe(averages).mean))

# Calculate the mean of the variances variable
print("Variance {}".format(describe(variances).mean))

#Theoretical mean 
# Calculate the mean and variance
print(binom.stats(n=10, p=0.25))

Mean 2.4843333333333333
Variance 1.8509111111111112
(array(2.5), array(1.875))


**Calculating Probabilities (join, independence, interesection, conditional)**

In [19]:
# Individual probabilities
P_Eng_works = 0.99
P_GearB_works = 0.995

# Joint probability calculation
P_both_works = P_Eng_works*P_GearB_works

print(P_both_works)

# Individual probabilities
P_Eng_fails = 0.01
P_Eng_works = 0.99
P_GearB_fails = 0.005
P_GearB_works = 0.995

# Joint probability calculation
P_only_GearB_fails = P_GearB_fails*P_Eng_works
P_only_Eng_fails = P_Eng_fails*P_GearB_works

# Calculate result
P_one_fails = P_only_GearB_fails+P_only_Eng_fails

print(P_one_fails)

# Individual probabilities
P_Eng_fails = 0.01
P_Eng_works = 0.99
P_GearB_fails = 0.005
P_GearB_works = 0.995

# Joint probability calculation
P_EngW_GearBW = P_Eng_works*P_GearB_works
P_EngF_GearBF = P_Eng_fails*P_GearB_fails

# Calculate result
P_fails_or_works = P_EngW_GearBW+P_EngF_GearBF

print(P_fails_or_works)

0.98505
0.0149
0.9851


In [21]:
# Ace probability
P_Ace = 4/52

# Not Ace probability
P_not_Ace = 1 - P_Ace

print(P_not_Ace)

# Figure probabilities
P_Hearts = 13/52
P_Diamonds = 13/52

# Probability of red calculation
P_Red = P_Hearts + P_Diamonds

print(P_Red)

# Figure probabilities
P_Jack = 4/52
P_Spade = 13/52

# Joint probability
P_Jack_n_Spade = 1/52

# Probability of Jack or spade
P_Jack_or_Spade = P_Jack + P_Spade - P_Jack_n_Spade

print(P_Jack_or_Spade)
# Figure probabilities
P_King = 4/52
P_Queen = 4/52

# Joint probability
P_King_n_Queen = 0

# Probability of King or Queen
P_King_or_Queen = P_King + P_Queen - P_King_n_Queen

print(P_King_or_Queen)

0.9230769230769231
0.5
0.3076923076923077
0.15384615384615385


In [None]:
# Needed quantities
Delayed_on_Friday = 11
On_Friday = 138

# Probability calculation
P_Delayed_g_Friday = Delayed_on_Friday / On_Friday

print(P_Delayed_g_Friday)


**Total Probability Law**
Formula 1 engines

Suppose that two manufacturers, A and B, supply the engines for Formula 1 racing cars, with the following characteristics:

    99% of the engines from factory A last more than 5,000 km.
    Factory B manufactures engines that last more than 5,000 km with 95% probability.
    70% of the engines are from manufacturer A, and the rest are produced by manufacturer B.

What is the chance that an engine will last more than 5,000 km?

In [22]:
# Needed probabilities
P_A = 0.7
P_last5000_g_A = 0.99
P_B = 0.3
P_last5000_g_B = 0.95

# Total probability calculation
P_last_5000 = P_A*P_last5000_g_A+P_B*P_last5000_g_B

print(P_last_5000)

0.978


Voters

Of the total population of three states X, Y, and Z, 43% are from state X, 25% are from state Y, and 32% are from state Z. A poll is taken and the result is the following:

    53% of the voters support John Doe in state X.
    67% of the voters support John Doe in state Y.
    32% of the voters support John Doe in state Z.

1. Given that a voter supports John Doe, answer the following questions.
What is the probability that the voter lives in state X and supports John Doe?


In [24]:
# Individual probabilities
P_X = 0.43

# Conditional probabilities
P_Support_g_X = 0.53

# Total probability calculation
P_X_n_Support = P_X * P_Support_g_X
print(P_X_n_Support)

0.22790000000000002


2. What is the probability that the voter lives in state Z and does not support John Doe?

In [23]:
# Individual probabilities
P_Z = 0.32

# Conditional probabilities
P_Support_g_Z = 0.32
P_NoSupport_g_Z = 1 - P_Support_g_Z

# Total probability calculation
P_Z_n_NoSupport = P_Z * P_NoSupport_g_Z
print(P_Z_n_NoSupport)

0.2176



3. What is the total percentage of voters that support John Doe?

In [25]:
# Individual probabilities
P_X = 0.43
P_Y = 0.25
P_Z = 0.32

# Conditional probabilities
P_Support_g_X = 0.53
P_Support_g_Y = 0.67
P_Support_g_Z = 0.32

# Total probability calculation
P_Support = P_X * P_Support_g_X + P_Y * P_Support_g_Y + P_Z * P_Support_g_Z
print(P_Support)

0.4978


**Bayes Rule**
Why is Bayes' rule important?
It allows you to calculate conditional probabilities for events that can be partitions in nonoverlapping parts.

*independent*
P(A and B) = P(A)P(B)

*dependent*
P(A and B)  = P(A)P(B|A)

P(A)P(B|A)= P(B)P(A|B)

P(A|B)= P(A)P(B|A)/ P(B)

Factories and parts

A certain electronic part is manufactured by three different vendors named V1, V2, and V3.

Half of the parts are produced by V1, 25% by V2, and the rest by V3. The probability of a part being damaged given that it was produced by V1 is 1%, while it's 2% for V2 and 3% for V3.

If a part taken at random is damaged, answer the following questions.
Instructions 2/3
35 XP

    What is the probability that the part was manufactured by V1?

In [1]:
# Individual probabilities & conditional probabilities
P_V1 = 0.5
P_V2 = 0.25
P_V3 = 0.25
P_D_g_V1 = 0.01
P_D_g_V2 = 0.02
P_D_g_V3 = 0.03

# Probability of Damaged
P_Damaged = (P_V1 * P_D_g_V1)+ ( P_V2* P_D_g_V2) + (P_V3 * P_D_g_V3)

# Bayes' rule for P(V1|D)
P_V1_g_D = (P_V1 * P_D_g_V1) / P_Damaged

print(P_V1_g_D)

0.2857142857142857


Swine flu blood test

You go to the doctor about a strong headache. The doctor randomly selects you for a blood test for swine flu, which is suspected to affect 1 in 9,000 people in your city. The accuracy of the test is 99%, meaning that the probability of a false positive is 1%. The probability of a false negative is zero.

Given that you test positive, answer the following questions.

What is the probability that you have swine flu given that your test outcome is positive?


In [2]:
# Probability of having Swine_flu
P_Swine_flu = 1./9000

# Probability of not having Swine_flu
P_no_Swine_flu = 1 - P_Swine_flu

# Probability of being positive given that you have Swine_flu
P_Positive_g_Swine_flu = 1

# Probability of being positive given that you do not have Swine_flu
P_Positive_g_no_Swine_flu = 0.01

# Probability of Positive
P_Positive = (P_Swine_flu * P_Positive_g_Swine_flu) + (P_no_Swine_flu * P_Positive_g_no_Swine_flu)

# Bayes' rule for P(Swine_flu|Positive)
P_Swine_flu_g_Positive = (P_Swine_flu * P_Positive_g_Swine_flu) / P_Positive

print(P_Swine_flu_g_Positive)

0.010990218705352238


Swine flu blood test

You go to the doctor about a strong headache. The doctor randomly selects you for a blood test for swine flu, which is suspected to affect 1 in 9,000 people in your city. The accuracy of the test is 99%, meaning that the probability of a false positive is 1%. The probability of a false negative is zero.

Given that you test positive, answer the following questions.
Instructions 1/3
35 XP

    What is the probability that you have swine flu given that your test outcome is positive?

In [3]:
# Probability of having Swine_flu
P_Swine_flu = 1./9000

# Probability of not having Swine_flu
P_no_Swine_flu = 1 - P_Swine_flu

# Probability of being positive given that you have Swine_flu
P_Positive_g_Swine_flu = 1

# Probability of being positive given that you do not have Swine_flu
P_Positive_g_no_Swine_flu = 0.01

# Probability of Positive
P_Positive = (P_Swine_flu * P_Positive_g_Swine_flu) + (P_no_Swine_flu * P_Positive_g_no_Swine_flu)

# Bayes' rule for P(Swine_flu|Positive)
P_Swine_flu_g_Positive = (P_Swine_flu * P_Positive_g_Swine_flu) / P_Positive

print(P_Swine_flu_g_Positive)

0.010990218705352238


**Law of Large Numbers**
As we increase the sample size, the sample mean approaches the expected value. 

**Central Limit Theorem**

Taking sample means from an repeated samples from an unkown distribution will yield a normal distribution of the sample means.