Discrete probability (PMF, PDF, Mean, Var)

In [None]:
# Book Figure 6-2

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

PMF = [.58, .34, .07, .01]
DV = [0,1,2,3]
df = pd.DataFrame({'PMF' : [.58, .34, .07, .01],'DV' : [0,1,2,3]})
plt.bar(df['DV'], df['PMF'], width = 0.4)
plt.bar(df['DV'] , df['PMF'].cumsum(), alpha=.5)
plt.xlabel('Number of Earthquake')
plt.ylabel('Probability')
Mean = np.dot(PMF,DV)
print("DV Mean is:", Mean)
Dev2=np.square(DV-Mean)
Var = np.dot(Dev2,PMF)
print("DV Variance is:" ,Var)

Discrete probability (simulation)

In [None]:
# Book Figure 6-5

import matplotlib.pyplot as plt
import numpy as np

NUM_ROLLS = 1000
values = [1, 2, 3, 4, 5, 6]

# Fair sided die
sample = np.random.choice(values, NUM_ROLLS)
side, count = np.unique(sample, return_counts=True)
probs = count / len(sample)
plt.bar(side, probs, color='blue')
plt.title(
    f"Discrete Probability Distribution for Fair 6-Sided Die ({NUM_ROLLS} rolls)")
plt.ylabel("Probability")
plt.xlabel("Outcome")
plt.show()

# Unfair sided die
probs = [0.1, 0.1, 0.1, 0.1, 0.1, 0.5]
sample = np.random.choice(values, NUM_ROLLS, p=probs)
side, count = np.unique(sample, return_counts=True)
probs = count / len(sample)
plt.bar(side, probs, color='red')
plt.title(
    f"Discrete Probability Distribution for UnFair 6-Sided Die ({NUM_ROLLS} rolls)")
plt.ylabel("Probability")
plt.xlabel("Outcome")
plt.show()

Bernouli event simulation

In [None]:
# Book Figure 6-6

import numpy as np

BE = np.random.randint(0,2)
if BE==1:
  print('Success')
else:
  print('Failure')

Success


Coin flipping simulation

In [None]:
# Book Figure 6-8

import matplotlib.pyplot as plt
import numpy as np

np.random.seed(10)
def frequency_heads(coin_flip_sequence):
    total_heads = len([head for head in coin_flip_sequence if head == 1])
    return total_heads / len(coin_flip_sequence)
coin_flips = [np.random.randint(0, 2) for _ in range(10)]
freq_heads = frequency_heads(coin_flips)
print(f"Relative Frequency of Heads is {freq_heads}")

np.random.seed(0)
coin_flips = []
frequencies = []
for _ in range(1000):
    coin_flips.append(np.random.randint(0, 2))
    frequencies.append(frequency_heads(coin_flips))
plt.plot(list(range(1000)), frequencies)
plt.axhline(0.5, color='k')
plt.xlabel('Number of Coin Flips')
plt.ylabel('Head-Relative Frequency')
plt.show()

Bernouli event in scipy

In [None]:
# Book Figure 6-10

from scipy.stats import bernoulli
import seaborn as sns

data_bern = bernoulli.rvs(size=1000,p=0.6) #generates an array containing specified number of random values drawn from the given geometric distribution.
ax= sns.distplot(data_bern,
                 kde=False,
                 color="skyblue",
                 hist_kws={"linewidth": 1,'alpha':1})
ax.set(xlabel='Bernoulli Distribution', ylabel='Frequency')


Bionomial distribution chart

In [None]:
# Book Figure 6-14

from scipy.stats import binom
import seaborn as sns

data_binom = binom.rvs(n=10,size=100,p=0.9)
ax= sns.distplot(data_binom,
                 kde=False,
                 color="green",
                 hist_kws={"linewidth": 1,'alpha':1})
ax.set(xlabel='Binomial Distribution', ylabel='Frequency')


Bionomial distribution probability for a different number of failures

In [None]:
# Book Figure 6-16

from scipy.stats import binom
import matplotlib.pyplot as plt

n = 10
p = 0.3
r_values = list(range(n + 1))
mean, var = binom.stats(n, p)
dist = [binom.pmf(r, n, p) for r in r_values ]
distc = [binom.cdf(r, n, p) for r in r_values ]
stat = [binom.stats(n,p, loc=0, moments='mvsk')]
for i in range(n + 1):
    print(str(r_values[i]) + "\t" + str(dist[i]))
print("Mean, Variance, Skewness, Kurtosis:",stat)
plt.bar(r_values, dist)
plt.bar(r_values, distc, alpha=.5)
plt.xlabel('Number of failure')
plt.ylabel('Probability')
plt.show()

Geometric distribution

In [None]:
# Book Figure 6-19

from scipy.stats import geom
import numpy as np

x= np.arange(0,10+1)
p=0.3
dist=geom(p)
ax=sns.barplot(x=x,y=dist.pmf(x))
ax=sns.barplot(x=x,y=dist.cdf(x), alpha=.5)
plt.ylabel("Probability", fontsize="14")
plt.xlabel("Year", fontsize="14")


Geometric distribution for accident probability

In [None]:
# Book Figure 6-21

from scipy.stats import geom
import numpy as np

X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
p = 0.3
geom_pd = geom.pmf(X, p)
fig, ax = plt.subplots(1, 1, figsize=(8, 6))
ax.plot(X, geom_pd, 'bo', ms=8, label='geom pmf')
plt.ylabel("Probability", fontsize="14")
plt.xlabel("Year", fontsize="14")
ax.vlines(X, 0, geom_pd, colors='b', lw=5, alpha=0.5)


Geometric distribution simulation

In [None]:
# Book Figure 6-23

from scipy.stats import geom
import seaborn as sns

data_geom = geom.rvs(p=.03,size=1000, random_state=13)
ax= sns.distplot(data_geom,
                 kde=False,
                 color="skyblue",
                 hist_kws={"linewidth": 1,'alpha':1})
ax.set(xlabel='Geometrical Distribution', ylabel='Frequency')

Poisson distribution (toll booth)

In [None]:
# Book Figure 6-25

from scipy.stats import poisson

data_poisson = poisson.rvs(mu=3,size=100)
ax= sns.distplot(data_poisson,
                 kde=False,
                 color="skyblue",
                 hist_kws={"linewidth": 5,'alpha':1})
ax.set(xlabel='Poisson Distribution', ylabel='Frequency')

Poisson distribution (water pipe network)

In [None]:
# Book Figure 6-27

from scipy.stats import poisson

X = [0, 1, 2, 3, 4, 5,6,7,8,9,10]
lmbda = 2
poisson_pd = poisson.pmf(X, lmbda)
fig, ax = plt.subplots(1, 1, figsize=(8, 6))
ax.plot(X, poisson_pd, 'bo', ms=8, label='poisson pmf')
ax.vlines(X, 0, poisson_pd, colors='b', lw=5, alpha=0.5)
ax.set(xlabel='Number of cracks', ylabel='Probability')

Poisson distribution (bus station)

In [None]:
# Book Figure 6-29

from scipy.stats import poisson

X = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
lmbda = 8
# Probability values
poisson_pd = poisson.pmf(X, lmbda)
# Plot the probability distribution
fig, ax = plt.subplots(1, 1, figsize=(8, 6))
ax.plot(X, poisson_pd, 'bo', ms=8, label='poisson pmf')
plt.ylabel("Probability", fontsize="14")
plt.xlabel("Number of Buses", fontsize="14")
ax.vlines(X, 0, poisson_pd, colors='b', lw=5, alpha=0.5)


Similarity of poisson and normal distrbutions

In [None]:
# Book Figure 6-31

from numpy import random
import seaborn as sns

sns.distplot(random.poisson(lam=50, size=1000), hist=True, label='poisson')
sns.distplot(random.normal(loc=50, scale=7, size=1000), hist=True, label='normal')
plt.xlabel("Poisson vs normal distrbutions")


Continuous probability (uniform distribution)

In [None]:
# Book Figure 6-34

from scipy.stats import uniform
import matplotlib.pyplot as plt
import numpy as np

x = np.arange(0, 6, 0.1)
y = uniform.pdf(x,2,2)
plt.plot(x, y)
plt.xlabel('Airoplane travel time (hr)')
plt.ylabel('Probability')
plt.show()
y = uniform.cdf(x, 2, 2)
plt.plot(x, y, color='red')
plt.xlabel('Airoplane travel time (hr)')
plt.ylabel('Probability')
plt.show()


Random number generation

In [None]:
# Book Figure 6-36

from scipy.stats import uniform
import matplotlib.pyplot as plt
import numpy as np

np.random.seed(10) #fixing the seed for reproducibility
y = uniform.rvs(0, 1, 10000)
bin = np.arange(-.5,1.5,0.01)
plt.hist(y, bins=bin, edgecolor='blue')
plt.xlabel('Unifrom Distribution')
plt.ylabel('Frequency')
plt.show()
print("mean, variance, Skewness, kurtosis: ", uniform.stats(loc=0,scale=1, moments='mvsk'))


Normal distribution

In [None]:
# Book Figure 6-38

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

x = np.arange(-15, 15, 0.1)
y = norm.pdf(x, 4, 2)
z = norm.pdf(x, 4, 1)
t = norm.pdf(x, 0, 2)
m = norm.pdf(x, 0, 1)
plt.plot(x, y)
plt.plot(x, z)
plt.plot(x, t)
plt.plot(x, m)
plt.xlabel('Normal Distribution')
plt.ylabel('Probability')
plt.show()

Normal distribution simulation

In [None]:
# Book Figure 6-40

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

np.random.seed(10)
y = norm.rvs(0, 1, 10000)
bin = np.arange(-4,4,0.1)
plt.hist(y, bins=bin, edgecolor='blue')
plt.xlabel('Standard Normal Distribution')
plt.ylabel('Frequency')
plt.show()
print("mean, variance, Skewness, kurtosis:",norm.stats(loc=0,scale=1, moments='mvsk'))


Normal probability calculation

In [None]:
# Book Figure 6-41

import scipy.stats as st
from scipy.stats import norm

x = st.norm.cdf(75,loc=80,scale=10)
print("the probability of cars running with at least 75 kmh:",x)
y = norm.ppf(.85, 80, 10)
print(" 85% percentile:", y)


t distribution

In [None]:
# Book Figure 6-44

from scipy.stats import norm, t
import matplotlib.pyplot as plt
import numpy as np

x = np.arange(-4, 4, 0.1)
y = t.pdf(x, 1)
z = norm.pdf(x, 0, 1)
plt.plot(x, y)
plt.plot(x, z)
plt.xlabel('Standard Normal vs t Distribution')
plt.ylabel('Probability')
plt.show()


t distribution simulation and probability calculation

In [None]:
# Book Figure 6-46

from scipy.stats import t
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st


np.random.seed(10)
y = t.rvs(10, 0, 1, 10000)
bin = np.arange(-4,4,0.1)
plt.hist(y, bins=bin, edgecolor='blue')
plt.xlabel('t Distribution')
plt.ylabel('Frequency')
plt.show()

print(" Probability of getting at most 35 MPa:" , st.t.cdf(35,10, loc=30,scale=5))
print("mean, variance, Skewness, kurtosis: ", t.stats(10, loc=30,scale=5, moments='mvsk'))


Log-normal distribution

In [None]:
# Book Figure 6-49

from scipy.stats import lognorm
import matplotlib.pyplot as plt
import numpy as np

x = np.arange(-1, 10, 0.1)
y = lognorm.pdf(x, 1, 0, 2)
plt.plot(x, y)
plt.xlabel('Log-Normal Density Distribution')
plt.ylabel('Probability')
plt.show()
x = np.arange(-1, 10, 0.1)
y = lognorm.cdf(x, 1, 0, 2)
plt.xlabel('Log-Normal Cumulative Distribution')
plt.ylabel('Probability')
plt.plot(x, y, color='red')
plt.show()


Log-normal simulation and probability calculation

In [None]:
# Book Figure 6-51

from scipy.stats import lognorm
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st


np.random.seed(10)
y = lognorm.rvs(1,0, 1, 10000)
bin = np.arange(-1,10,0.1)
plt.hist(y, bins=bin, edgecolor='blue')
plt.xlabel('Log-Normal Distribution')
plt.ylabel('Frequency')
plt.show()

print("Probability of walking at most 1 km:", st.lognorm.cdf(1,1,loc=0,scale=1))
print("mean, variance, Skewness, kurtosis: ", lognorm.stats(1,loc=0,scale=1, moments='mvsk'))


Exponential distribution

In [None]:
# Book Figure 6-54

from scipy.stats import expon
import matplotlib.pyplot as plt
import numpy as np

x = np.arange(-1, 10, 0.1)
y = expon.pdf(x, 0, 2)
plt.plot(x, y)
plt.xlabel('Exponential Distribution')
plt.ylabel('Probability')
plt.show()

x = np.arange(-1, 10, 0.1)
y = expon.cdf(x, 0, 5)
plt.plot(x, y, color='red')
plt.xlabel('Exponential Distribution')
plt.ylabel('Probability')
plt.show()


Exponential distribution simulation and probability calculation

In [None]:
# Book Figure 6-56

from scipy.stats import expon
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st

np.random.seed(10)
y = expon.rvs(1, 2, 10000)
bin = np.arange(-1,10,0.1)
plt.hist(y, bins=bin, edgecolor='blue')
plt.xlabel('Exponential Distribution')
plt.ylabel('Frequency')
plt.show()
print("cumulative probability less than 2:", st.expon.cdf(2,loc=1,scale=2))
print("mean, variance, Skewness, kurtosis:",expon.stats(loc=1,scale=2, moments='mvsk'))

Chi-square distribution

In [None]:
# Book Figure 6-56

from scipy.stats import chi2
import matplotlib.pyplot as plt
import numpy as np

x = np.arange(-1, 60, 0.1)
y = chi2.pdf(x, 2, 0, 1)
z = chi2.pdf(x, 5, 0, 1)
t = chi2.pdf(x, 30, 0, 1)
plt.plot(x, y)
plt.plot(x, z)
plt.plot(x, t)
plt.xlabel('Chi-square Distribution')
plt.ylabel('Probability')
plt.show()

x = np.arange(-1, 10, 0.1)
y = chi.cdf(x, 2, 0, 1)
plt.xlabel('Chi-square Cumulative Distribution')
plt.ylabel('Probability')
plt.plot(x, y, color='red')
plt.show()

Chi-square distribution simulation and probability calculation

In [None]:
# Book Figure 6-60

from scipy.stats import chi2
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st

np.random.seed(10)
y = chi2.rvs(2,2, 1, 10000)
bin = np.arange(-1,10,0.1)
plt.hist(y, bins=bin, edgecolor='blue')
plt.xlabel('Chi-square Distribution')
plt.ylabel('Frequency')
plt.show()
print("Cumulative Probability less than 3:",st.chi2.cdf(3,2,loc=2,scale=1))
print("mean, variance, Skewness, kurtosis:",chi2.stats(2,loc=2,scale=1, moments='mvsk'))
