# Statistics for Finance and Business with Python

## Common Probability Distributions and Confidence Intervals

### Generating Random Numbers with Numpy

In [None]:
import numpy as np

In [None]:
np.random.randint(low = 1, high = 11, size = 10)

In [None]:
np.random.random(size = 10)

In [None]:
np.random.uniform(low = 1, high = 10, size = 10)

In [None]:
np.random.normal(size = 10)

In [None]:
np.random.normal(loc = 100, scale = 10, size = 10)

### Reproducibility with np.random.seed()

In [None]:
import numpy as np

In [None]:
np.random.randint(low = 1, high = 11, size = 10)

In [None]:
np.random.randint(low = 1, high = 11, size = 10)

In [None]:
np.random.randint(low = 1, high = 11, size = 10)

In [None]:
np.random.seed(123)
np.random.randint(low = 1, high = 11, size = 10)

In [None]:
np.random.seed(123)
np.random.randint(low = 1, high = 11, size = 10)

In [None]:
np.random.seed(5)
np.random.randint(low = 1, high = 11, size = 10)

In [None]:
np.random.seed(5)
np.random.randint(low = 1, high = 11, size = 10)

### Discrete Uniform Distributions

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
np.random.randint(low = 1, high = 7, size = 10)

In [None]:
np.random.seed(123)
a = np.random.randint(low = 1, high = 7, size = 100000)
a

In [None]:
a.mean()

In [None]:
a.std()

In [None]:
100000/6

In [None]:
plt.figure(figsize = (12, 8))
plt.hist(a, bins = 6, ec = "black")
plt.title("Discrete Uniform Distribution", fontsize = 20)
plt.ylabel("Absolute Frequency", fontsize = 15)
plt.show()

In [None]:
plt.figure(figsize = (12, 8))
plt.hist(a, bins = 6, weights = np.ones(len(a)) / len(a), ec = "black")
plt.title("Discrete Uniform Distribution", fontsize = 20)
plt.ylabel("Relative Frequency", fontsize = 15)
plt.show()

In [None]:
plt.figure(figsize = (12, 8))
plt.hist(a, bins = 6, density = True, ec = "black")
plt.title("Discrete Uniform Distribution", fontsize = 20)
plt.show()

In [None]:
plt.figure(figsize = (12, 8))
plt.hist(a, bins = 6, density = True, cumulative= True, ec = "black")
plt.title("Discrete Uniform Distribution", fontsize = 20)
plt.ylabel("Cumulative Relative Frequency", fontsize = 15)
plt.show()

## Continuous Uniform Distributions

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
np.random.seed(123)
b = np.random.uniform(low = 0, high = 10, size = 10000000)

In [None]:
b

In [None]:
b.mean()

In [None]:
b.std()

In [None]:
plt.figure(figsize = (12, 8))
plt.hist(b, bins = 1000, density = True)
plt.title("Continuous Uniform Distribution", fontsize = 20)
plt.ylabel("pdf", fontsize = 15)
plt.show()

In [None]:
plt.figure(figsize = (12, 8))
plt.hist(b, bins = 1000, density= True, cumulative= True)
plt.grid()
plt.title("Continuous Uniform Distribution", fontsize = 20)
plt.ylabel("cdf", fontsize = 15)
plt.yticks(np.arange(0, 1.1, 0.1))
plt.show()

### Creating a normally distributed Random Variable

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
mu = 100
sigma = 2
size = 1000000

In [None]:
np.random.seed(123)
pop = np.random.normal(loc = mu, scale = sigma, size = size)

In [None]:
pop.size

In [None]:
pop.mean()

In [None]:
pop.std()

In [None]:
plt.figure(figsize = (20, 8))
plt.hist(pop, bins = 1000)
plt.title("Normal Distribution", fontsize = 20)
plt.xlabel("Screw Length", fontsize = 15)
plt.ylabel("Absolute Frequency", fontsize = 15)
plt.show()

In [None]:
import scipy.stats as stats

In [None]:
stats.skew(pop)

In [None]:
stats.kurtosis(pop)

In [None]:
stats.kurtosis(pop, fisher= False)

In [None]:
stats.describe(pop)

### Normal Distribution - Probability Density Function (pdf) with scipy.stats

In [None]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

In [None]:
mu = 100
sigma = 2

In [None]:
x = np.linspace(90, 110, 1000)
x

In [None]:
y = stats.norm.pdf(x, loc = mu, scale = sigma)
y

In [None]:
plt.figure(figsize = (20, 8))
plt.hist(pop, bins = 1000, density = True)
plt.plot(x, y, linewidth = 3, color = "red")
plt.grid()
plt.title("Normal Distribution", fontsize = 20)
plt.xlabel("Screw Length", fontsize = 15)
plt.ylabel("pdf", fontsize = 15)
plt.show()

In [None]:
pop

### Normal Distribution - Cumulative Distribution Function (cdf) with scipy.stats

In [None]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

In [None]:
mu = 100
sigma = 2

In [None]:
x = np.linspace(90, 110, 1000)

In [None]:
y = stats.norm.cdf(x, loc = mu, scale = sigma)

In [None]:
plt.figure(figsize = (20, 8))
plt.hist(pop, bins = 1000, density= True, cumulative= True)
plt.plot(x, y, color = "red", linewidth = 3)
plt.grid()
plt.title("Normal Distribution", fontsize = 20)
plt.xlabel("Screw Length", fontsize = 15)
plt.ylabel("cdf", fontsize = 15)
plt.yticks(np.arange(0, 1.1, 0.1))
plt.show()

In [None]:
pop

### The Standard Normal Distribution and Z-Scores

In [None]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

In [None]:
pop

In [None]:
mu = pop.mean()
sigma = pop.std()

In [None]:
mu

In [None]:
sigma

In [None]:
pop[0]

In [None]:
(pop[0] - mu) / sigma

In [None]:
pop[1]

In [None]:
(pop[1] - mu) / sigma

In [None]:
(pop - mu) / sigma

In [None]:
z = stats.zscore(pop)
z

In [None]:
round(z.mean(), 4)

In [None]:
z.std()

In [None]:
stats.skew(z)

In [None]:
stats.kurtosis(z)

In [None]:
x = np.linspace(-4, 4, 1000)

In [None]:
y = stats.norm.pdf(x, loc = 0, scale = 1)

In [None]:
plt.figure(figsize = (20, 8))
#plt.hist(z, bins = 1000, density= True)
plt.grid()
plt.plot(x, y, linewidth = 3, color = "red")
plt.xticks(np.arange(-4, 5, 1),
           labels = ["-4σ = -4", "-3σ = -3", "-2σ = -2", "-1σ = -1", "mu = 0", "1σ = 1", "2σ = 2", "3σ = 3", "4σ = 4"],
           fontsize = 15)
plt.title("Standard Normal Distribution", fontsize = 20)
plt.ylabel("pdf", fontsize = 15)
plt.show()

In [None]:
y = stats.norm.cdf(x)

In [None]:
plt.figure(figsize = (20, 8))
#plt.hist(z, bins = 1000, density= True, cumulative= True)
plt.plot(x, y, color = "red", linewidth = 3)
plt.grid() 
plt.xticks(np.arange(-4, 5, 1),
           labels = ["-4σ = -4", "-3σ = -3", "-2σ = -2", "-1σ = -1", "mu = 0", "1σ = 1", "2σ = 2", "3σ = 3", "4σ = 4"],
           fontsize = 15)
plt.yticks(np.arange(0, 1.1, 0.1))
plt.title("Standard Normal Distribution", fontsize = 20)
plt.ylabel("cdf", fontsize = 15)
plt.show()

### Probabilities and Z-Scores with scipy.stats

In [None]:
import numpy as np
import scipy.stats as stats

In [None]:
stats.norm.cdf(-1, loc = 0, scale = 1)

In [None]:
1 - stats.norm.cdf(-1)

In [None]:
stats.norm.cdf(1)

In [None]:
1 - stats.norm.cdf(1)

In [None]:
stats.norm.cdf(1) - stats.norm.cdf(-1)

In [None]:
stats.norm.cdf(-2)

In [None]:
1 - stats.norm.cdf(2)

In [None]:
stats.norm.cdf(2) - stats.norm.cdf(-2)

In [None]:
stats.norm.cdf(0)

In [None]:
pop

In [None]:
minus_two_sigma = pop.mean() - 2 * pop.std()
minus_two_sigma

In [None]:
(pop < minus_two_sigma).mean()

In [None]:
1 -stats.norm.cdf(x = 105, loc = pop.mean(), scale = pop.std())

In [None]:
z = (105-pop.mean()) / pop.std()
z

In [None]:
stats.norm.cdf(z)

In [None]:
stats.norm.ppf(0.5, loc = 0, scale = 1)

In [None]:
stats.norm.ppf(0.05)

In [None]:
stats.norm.ppf(0.95)

In [None]:
stats.norm.ppf(loc = pop.mean(), scale = pop.std(), q = 0.05)

In [None]:
stats.norm.ppf(loc = pop.mean(), scale = pop.std(), q = 0.95)

### Confidence Intervals

In [None]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

The ABC Company produces screws. The length of the screws follows a __Normal Distribution__ with __mean 100__ (millimeters) and __standard deviation 2__ (millimeters). Determine the __Confidence Interval__ around the mean where __90%__ of all observations can be found.


In [None]:
conf = 0.90

In [None]:
tails = (1-conf) / 2
tails

In [None]:
left = stats.norm.ppf(tails)
left

In [None]:
right = stats.norm.ppf(1-tails)
right

In [None]:
stats.norm.interval(conf)

In [None]:
left, right = stats.norm.interval(conf)

In [None]:
left

In [None]:
right

In [None]:
x = np.linspace(-5, 5, 1000)

In [None]:
y = stats.norm.pdf(x)

In [None]:
plt.figure(figsize = (20, 8))
plt.plot(x, y, color = "black", linewidth = 2)
plt.fill_between(x, y, where = ((x > right) | (x < left)), color = "blue", alpha = 0.2)
plt.fill_between(x, y, where = ((x < right) & (x > left)), color = "red", alpha = 0.2)
plt.grid()
plt.annotate("5%", xy = (1.75, 0.01), fontsize = 20)
plt.annotate("5%", xy = (-2.25, 0.01), fontsize = 20)
plt.annotate("90%", xy = (-0.6, 0.2), fontsize = 40)
plt.annotate("-1.645σ", xy = (-1.645, -0.015), fontsize = 10)
plt.annotate("1.645σ", xy = (1.645, -0.015), fontsize = 10)
plt.xticks(np.arange(-4, 5, 1), 
           labels = ["-4σ = -4", "-3σ = -3", "-2σ = -2", "-1σ = -1", "mu = 0", "1σ = 1", "2σ = 2", "3σ = 3", "4σ = 4"],
           fontsize = 10)
plt.title("Standard Normal Distribution", fontsize = 20)
plt.ylabel("pdf", fontsize = 15)
plt.show()

In [None]:
x = np.linspace(-5, 5, 1000)

In [None]:
y = stats.norm.cdf(x)

In [None]:
plt.figure(figsize = (12, 8))
plt.margins(x = 0, y = 0)
plt.plot(x, y, color = "black", linewidth = 2)
plt.vlines(x = [left, right], ymin = 0, ymax = [stats.norm.cdf(left), stats.norm.cdf(right)], linestyle = "--")
plt.hlines(y = [stats.norm.cdf(left), stats.norm.cdf(right)], xmin = -5, xmax = [left, right], linestyle = "--")
plt.grid()
plt.xticks(np.arange(-4, 5, 1), 
           labels = ["-4σ = -4", "-3σ = -3", "-2σ = -2", "-1σ = -1", "mu = 0", "1σ = 1", "2σ = 2", "3σ = 3", "4σ = 4"],
           fontsize = 15)
plt.yticks(np.arange(0, 1.1, 0.05), fontsize = 10)
plt.annotate("-1.645σ", xy = (-1.60, 0.015), fontsize = 10)
plt.annotate("1.645σ", xy = (1.7, 0.015), fontsize = 10)
plt.title("Standard Normal Distribution", fontsize = 20)
plt.ylabel("cdf", fontsize = 15)
plt.show()

In [None]:
stats.norm.interval(conf, loc = 100, scale = 2)

In [None]:
pop

In [None]:
left, right = np.percentile(pop, [5, 95])

In [None]:
left 

In [None]:
right