# **Basic Statistics With Python**

# Imports

In [1]:
import numpy as np
from scipy.stats import norm,t,chi2
from statistics import mean, mode, median
from statistics import stdev as sample_stdev, pstdev as population_stdev
from statistics import variance as sample_variance, pvariance as population_variance

# Measure_of_Center & Spread

In [None]:
a = [-1.0, 2.5, 3.25, 5.75]
b = [0.0, 12.5, 14.5, 20.0]
print("Mean is: ",mean(a))
print("Mode is: ",mode(a))
print("Median is: ",median(a))
print("Range is: ",median(a))
print("Sample Standard Deviation is: ",sample_stdev(a))
print("Population Standard Deviation is: ",population_stdev(a))
print("Sample Variance is: ",sample_variance(a))
print("Population Variance is: ",population_variance(a))

Mean is:  2.625
Mode is:  -1.0
Median is:  2.875
Sample Standard Deviation is:  2.787621447279622
Population Standard Deviation is:  2.414150989478496
Sample Variance is:  7.770833333333333
Population Variance is:  5.828125


# Quartiles

In [None]:
print("50 Percentile is : ",np.percentile(a,50))
print("25 Percentile is : ",np.percentile(a,25))
print("75 Percentile is : ",np.percentile(a,75))
print("100 Percentile is : ",np.percentile(a,100))
print("IQR is : ",np.percentile(a,75)-np.percentile(a,25))

50 Percentile is :  2.875
25 Percentile is :  1.625
75 Percentile is :  3.875
100 Percentile is :  5.75
IQR is :  2.25


# Covariances & Correlations

In [None]:
a = [-1.0, 2.5, 3.25, 5.75]
b = [0.0, 12.5, 14.5, 20.0]
print("Covariance Matrix is: ",np.cov(a,b))
print("Correlation Matrix is: ",np.corrcoef(a,b))
print("Covariance is: ",np.cov(a,b)[0][1])
print("Correlation is: ",np.corrcoef(a,b)[0][1])

Covariance Matrix is:  [[ 7.77083333 23.33333333]
 [23.33333333 71.41666667]]
Correlation Matrix is:  [[1.         0.99047427]
 [0.99047427 1.        ]]
Covariance is:  23.333333333333332
Correlation is:  0.9904742745252149


# Multidimensional Arrays

In [None]:
A=np.array([[10,14,11,7,9.5,15,19],
            [8,9,17,14.5,12,18,15.5],
            [15,7.5,11.5,10,10.5,7,11],
            [11.5,11,9,12,14,12,7.5]])

In [None]:
# Range
a=np.ptp(A, axis=0)
b=np.ptp(A,axis=1)
print("Range along axis 0:",a)
print("Range along axis 1:",b)

# Same for Standard deviation & Variance

Range along axis 0: [ 7.   6.5  8.   7.5  4.5 11.  11.5]
Range along axis 1: [12.  10.   8.   6.5]


In [None]:
# Variance
a = np.var(A,axis=0) # Population Var (ddof=0) # Sample Var (ddof=1)
b = np.var(A,axis=1)
print(a)
print(b)

[ 6.546875  5.921875  8.796875  7.546875  2.875    16.5      19.0625  ]
[13.98979592 12.8877551   6.12244898  3.92857143]


In [None]:
# Quartile

print("="*20,"\tAxis=0\t","="*20)
## Axis = 0
percentile25_axis_0 = np.percentile(A,25,axis=0)
percentile50_axis_0 = np.percentile(A,50,axis=0)
percentile75_axis_0 = np.percentile(A,75,axis=0)
percentile100_axis_0 = np.percentile(A,100,axis=0)
print("25 Percentile along axis = 0 is : ",percentile25_axis_0)
print("50 Percentile along axis = 0 is : ",percentile50_axis_0)
print("75 Percentile along axis = 0 is : ",percentile75_axis_0)
print("100 Percentile along axis = 0 is : ",percentile100_axis_0)
print("IQR along axis = 0 is : ",percentile75_axis_0 - percentile25_axis_0)

print("\n","="*20,"\tAxis=1\t","="*20)
## Axis = 0
percentile25_axis_1 = np.percentile(A,25,axis=1)
percentile50_axis_1 = np.percentile(A,50,axis=1)
percentile75_axis_1 = np.percentile(A,75,axis=1)
percentile100_axis_1 = np.percentile(A,100,axis=1)
print("25 Percentile along axis = 1 is : ",percentile25_axis_1)
print("50 Percentile along axis = 1 is : ",percentile50_axis_1)
print("75 Percentile along axis = 1 is : ",percentile75_axis_1)
print("100 Percentile along axis = 1 is : ",percentile100_axis_1)
print("IQR along axis = 1 is : ",percentile75_axis_1 - percentile25_axis_1)

25 Percentile along axis = 0 is :  [ 9.5    8.625 10.5    9.25  10.25  10.75  10.125]
50 Percentile along axis = 0 is :  [10.75 10.   11.25 11.   11.25 13.5  13.25]
75 Percentile along axis = 0 is :  [12.375 11.75  12.875 12.625 12.5   15.75  16.375]
100 Percentile along axis = 0 is :  [15.  14.  17.  14.5 14.  18.  19. ]
IQR along axis = 0 is :  [2.875 3.125 2.375 3.375 2.25  5.    6.25 ]

25 Percentile along axis = 1 is :  [ 9.75 10.5   8.75 10.  ]
50 Percentile along axis = 1 is :  [11.  14.5 10.5 11.5]
75 Percentile along axis = 1 is :  [14.5  16.25 11.25 12.  ]
100 Percentile along axis = 1 is :  [19. 18. 15. 14.]
IQR along axis = 1 is :  [4.75 5.75 2.5  2.  ]


# Eigenvalues, Eigenvectors

In [None]:
from numpy.linalg import norm, det, inv, eig
A = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(A)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [None]:
# Eig returns the eigenvalues array
# And the normalized (unit "length") eigenvectors, such that the column ``v[:,i]`` is the eigenvector corresponding to the eigenvalue ``w[i]``

w,v=eig(A)
for i in range(len(w)):
  print(w[i],v[:,i])

16.116843969807043 [-0.23197069 -0.52532209 -0.8186735 ]
-1.1168439698070427 [-0.78583024 -0.08675134  0.61232756]
-1.3036777264747022e-15 [ 0.40824829 -0.81649658  0.40824829]


# Confidence Intervals For A Normal Distribution

## Confidence Interval for the Mean of a Normal Population:

In [None]:
# Imports
import numpy as np
from scipy.stats import norm,t,chi2

### Theory: Known Population Variance

**A. Two-sided Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a known variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\\ $

$P(-\ Z_{\frac{\alpha}{2}}\ \leq\ \frac{\overline{X}-\mu}{\frac{\sigma}{\sqrt{n}}}\ \leq\ Z_{\frac{\alpha}{2}}) = 1-\alpha$

$P(\overline{X}\ -\ Z_{\frac{\alpha}{2}} \frac{\sigma}{\sqrt{n}}\ \leq\ \mu\ \leq\ \overline{X}\ +\ Z_{\frac{\alpha}{2}} \frac{\sigma}{\sqrt{n}}) = 1-\alpha$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the mean of a normal population is:

$[\overline{X}\ -\ Z_{\frac{\alpha}{2}} \frac{\sigma}{\sqrt{n}},\ \overline{X}\ +\ Z_{\frac{\alpha}{2}} \frac{\sigma}{\sqrt{n}}]$

**B. One-sided Lower Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a known variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\\ $

$P(-\infty\ \leq\ \frac{\overline{X}-\mu}{\frac{\sigma}{\sqrt{n}}}\ \leq\ Z_{\alpha}) = 1-\alpha$

$P(-\infty\ \leq\ \mu\ \leq\ \overline{X}\ +\ Z_{\alpha} \frac{\sigma}{\sqrt{n}}) = 1-\alpha$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the mean of a normal population is:

$[-\infty,\ \overline{X}\ +\ Z_{\alpha} \frac{\sigma}{\sqrt{n}}]$

**C. One-sided Upper Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a known variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\\ $

$P(-\ Z_{\alpha}\ \leq\ \frac{\overline{X}-\mu}{\frac{\sigma}{\sqrt{n}}}\ \leq\ \infty) = 1-\alpha$

$P(\overline{X}\ -\ Z_{\alpha} \frac{\sigma}{\sqrt{n}}\ \leq\ \mu\ \leq\   \infty) = 1-\alpha$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the mean of a normal population is:

$[\overline{X}\ -\ Z_{\alpha} \frac{\sigma}{\sqrt{n}},\ \infty]$

### Code: Known Population Variance

In [None]:
# Confidence Interval Of Sample Mean For Known Population Variance
class confidence_interval_for_mean_with_known_variance:
  """
  Parameters
  ----------
  population_sd : known standrad deviation of the population
  n : optional, number of sample members
  c_level : % confidence level
  type_t : 'two_sided_confidence', 'lower_confidence', 'upper_confidence'
  Sample_mean : mean of the sample
  data : optional, if you do not know the Sample_mean and n, just pass the data
  """
  def __init__(self, population_sd, c_level, type_c, Sample_mean = 0., n = 0., data=None):
    self.Sample_mean = Sample_mean
    self.population_sd = population_sd
    self.type_c = type_c
    self.n = n
    self.c_level = c_level
    self.data = data
    if data is not None:
      self.Sample_mean = np.mean(list(data))
      self.n = len(list(data))

    confidence_interval_for_mean_with_known_variance.__test(self)

  def __test(self):
    if self.type_c == 'two_sided_confidence':
      c_u = self.Sample_mean + (-norm.ppf((1-self.c_level)/2)) * (self.population_sd/np.sqrt(self.n))
      c_l = self.Sample_mean - (-norm.ppf((1-self.c_level)/2)) * (self.population_sd/np.sqrt(self.n))
      print(c_u," <= μ <= ", c_l)
      # display(Latex(f'${c_l} \leq \mu \leq {c_u}$'))
    elif self.type_c == 'lower_confidence':
      c_u = self.Sample_mean + (-norm.ppf(1-self.c_level)) * (self.population_sd/np.sqrt(self.n))
      print(c_u," <= μ")
      # display(Latex(f'$\mu \leq {c_u}$'))
    elif self.type_c == 'upper_confidence':
      c_l = self.Sample_mean - (-norm.ppf(1-self.c_level)) * (self.population_sd/np.sqrt(self.n))
      print("μ <= ", c_l)
      # display(Latex(f'${c_l} \leq \mu$'))

In [None]:
data = [5, 8.5, 12, 15, 7, 9, 7.5, 6.5, 10.5]
confidence_interval_for_mean_with_known_variance(population_sd = 2.9, c_level = 0.95, type_c = 'two_sided_confidence', data=data)
confidence_interval_for_mean_with_known_variance(population_sd = 2.9, c_level = 0.95, type_c = 'lower_confidence', data=data)
confidence_interval_for_mean_with_known_variance(population_sd = 2.9, c_level = 0.95, type_c = 'upper_confidence', data=data)
print("Done")

10.894631851722052  <= μ <=  7.105368148277948
10.590025172719756  <= μ
μ <=  7.4099748272802435
Done


### Theory: Unknown Population Variance

**A. Two-sided Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a unknown variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$P(-\ t_{\frac{\alpha}{2},n-1}\ <\ \frac{\overline{X}-\mu}{\frac{S}{\sqrt{n}}} <\ t_{\frac{\alpha}{2},n-1}) = 1-\alpha$

$P(\overline{X}\ -\ t_{\frac{\alpha}{2},n-1} \frac{S}{\sqrt{n}}\ <\ \mu\ <\ \overline{X}\ +\ t_{\frac{\alpha}{2},n-1} \frac{S}{\sqrt{n}}) = 1- \alpha$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the mean of a normal population is:

$[\overline{X}\ -\ t_{\frac{\alpha}{2},n-1} \frac{S}{\sqrt{n}},\ \overline{X}\ +\ t_{\frac{\alpha}{2},n-1} \frac{S}{\sqrt{n}}]$

**B. One-sided Lower Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a known variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\\ $

$P(-\infty\ \leq\ \frac{\overline{X}-\mu}{\frac{S}{\sqrt{n}}}\ \leq\ t_{\alpha,n-1}) = 1-\alpha$

$P(-\infty\ \leq\ \mu\ \leq\ \overline{X}\ +\ t_{\alpha,n-1} \frac{S}{\sqrt{n}}) = 1-\alpha$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the mean of a normal population is:

$[-\infty,\ \overline{X}\ +\ t_{\alpha,n-1} \frac{S}{\sqrt{n}}]$

**C. One-sided upper Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a known variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\\ $

$P(-t_{\alpha,n-1} \leq\ \frac{\overline{X}-\mu}{\frac{S}{\sqrt{n}}}\ \leq\ \infty) = 1-\alpha$

$P(\overline{X}\ -\ t_{\alpha,n-1} \frac{S}{\sqrt{n}} \leq\ \mu\ \leq\ \infty) = 1-\alpha$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the mean of a normal population is:

$[\overline{X}\ -\ t_{\alpha,n-1} \frac{S}{\sqrt{n}},\ \infty]$

### Code: Unknown Population Variance

In [3]:
class confidence_interval_for_mean_with_unknown_variance:
  """
  Parameters
  ----------
  n : optional, number of sample members
  c_level : % confidence level
  type_t : 'two_sided_confidence', 'lower_confidence', 'upper_confidence'
  Sample_std : optional, std of the sample
  Sample_mean : optional, mean of the sample
  data : optional, if you do not know the Sample_mean and n, just pass the data
  """
  def __init__(self, c_level, type_c, Sample_std = 0., Sample_mean = 0., n = 0.,  data=None):
    self.Sample_mean = Sample_mean
    self.Sample_std = Sample_std
    self.type_c = type_c
    self.n = n
    self.c_level = c_level
    self.data = data
    if data is not None:
      self.Sample_mean = np.mean(list(data))
      self.Sample_std = np.std(list(data), ddof=1)
      self.n = len(list(data))

    confidence_interval_for_mean_with_unknown_variance.__test(self)

  def __test(self):
    if self.type_c == 'two_sided_confidence':
      c_u = self.Sample_mean + (t.isf((1-self.c_level)/2, self.n-1)) * (self.Sample_std/np.sqrt(self.n))
      c_l = self.Sample_mean - (t.isf((1-self.c_level)/2, self.n-1)) * (self.Sample_std/np.sqrt(self.n))
      print(c_u," <= μ <= ", c_l)
    elif self.type_c == 'lower_confidence':
      c_u = self.Sample_mean + (t.isf(1-self.c_level, self.n-1)) * (self.Sample_std/np.sqrt(self.n))
      print(c_u," <= μ")
    elif self.type_c == 'upper_confidence':
      c_l = self.Sample_mean - (t.isf(1-self.c_level, self.n-1)) * (self.Sample_std/np.sqrt(self.n))
      print("μ <= ", c_l)

In [None]:
data = [5, 8.5, 12, 15, 7, 9, 7.5, 6.5, 10.5]
confidence_interval_for_mean_with_unknown_variance(c_level = 0.95, type_c = 'two_sided_confidence', data=data)
confidence_interval_for_mean_with_unknown_variance(c_level = 0.95, type_c = 'lower_confidence', data=data)
confidence_interval_for_mean_with_unknown_variance(c_level = 0.95, type_c = 'upper_confidence', data=data)
print("Done")

11.369194030150679  <= μ <=  6.630805969849321
10.910503993616548  <= μ
μ <=  7.0894960063834525
Done


In [4]:
data = [7.72,9.58,12.38,7.77,11.27,8.80,11.10,7.80,10.17,6.00]
confidence_interval_for_mean_with_unknown_variance(c_level = 0.95, type_c = 'two_sided_confidence', data=data)
confidence_interval_for_mean_with_unknown_variance(c_level = 0.95, type_c = 'lower_confidence', data=data)
confidence_interval_for_mean_with_unknown_variance(c_level = 0.95, type_c = 'upper_confidence', data=data)
print("Done")

10.682803667055117  <= μ <=  7.835196332944884
10.41276285902079  <= μ
μ <=  8.105237140979211
Done


## Confidence Interval for the Variance of a Normal Population:

### Theory: Known Population Mean

**A. Two-sided Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an known mean $\mu$ and a unknown variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

$S' = \sqrt{\frac{\sum_{i=1}^n\ (x_i\ -\ \overline{x})^2}{n}}$

$\\ $

Significance level = $\alpha$

$\ \chi^2_{1-\frac{\alpha}{2}, n}\ \leq\ \frac{(n)\ S'^2}{\sigma^2} \ \leq \chi^2_{\frac{\alpha}{2}, n} $

$\ \frac{(n)\ S'^2}{\chi^2_{\frac{\alpha}{2}, n}} \leq\ \sigma^2 \leq\ \frac{(n)\ S'^2}{\chi^2_{1-\frac{\alpha}{2}, n}}$

$\ \sqrt{\frac{(n)\ S'^2}{\chi^2_{\frac{\alpha}{2}, n}}} \leq\ \sigma \leq\ \sqrt{\frac{(n)\ S'^2}{\chi^2_{1-\frac{\alpha}{2}, n}}}$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the variance of a normal population is:

$[\frac{(n)\ S'^2}{\chi^2_{\frac{\alpha}{2}, n}},\ \frac{(n)\ S'^2}{\chi^2_{1-\frac{\alpha}{2}, n}}]$

and the $1-\alpha$ confidence interval for the standard deviation of a normal population is:

$[\sqrt{\frac{(n)\ S'^2}{\chi^2_{\frac{\alpha}{2}, n}}},\ \sqrt{\frac{(n)\ S'^2}{\chi^2_{1-\frac{\alpha}{2}, n}}}]$

**B. One-sided Lower Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an known mean $\mu$ and a unknown variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\ 0\ \leq\ \frac{(n)\ S'^2}{\sigma^2} \ \leq \chi^2_{1-\alpha, n} $

$\ 0 \leq\ \sigma^2 \leq\ \frac{(n)\ S'^2}{\chi^2_{1-\alpha, n}}$

$\ 0 \leq\ \sigma \leq\ \sqrt{\frac{(n)\ S'^2}{\chi^2_{1-\alpha, n}}}$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the variance of a normal population is:

$[0,\ \frac{(n)\ S'^2}{\chi^2_{1-\frac{\alpha}{2}, n}}]$

and the $1-\alpha$ confidence interval for the standard deviation of a normal population is:

$[0,\ \sqrt{\frac{(n)\ S'^2}{\chi^2_{1-\alpha, n}}}]$


**C. One-sided Upper Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an known mean $\mu$ and a unknown variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\ \chi^2_{1-\alpha, n}\ \leq\ \frac{(n)\ S'^2}{\sigma^2} \ \leq \infty $

$\ \frac{(n)\ S'^2}{\chi^2_{\alpha, n}} \leq\ \sigma^2 \leq\ \infty$

$\ \sqrt{\frac{(n)\ S'^2}{\chi^2_{\alpha, n}}} \leq\ \sigma \leq\ \infty$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the variance of a normal population is:

$[\frac{(n)\ S'^2}{\chi^2_{\alpha, n}},\ \infty]$

and the $1-\alpha$ confidence interval for the standard deviation of a normal population is:

$[\sqrt{\frac{(n)\ S'^2}{\chi^2_{\alpha, n}}},\ \infty]$

### Code: Known Population Mean

In [None]:
# Confidence Interval Of Sample Variance For Known Population Mean
class confidence_interval_for_var_with_known_mean:
  """
  Parameters
  ----------
  population_mean : known mean of the population
  Sample_var : optional, variance of the sample
  n : optional, number of sample members
  c_level : % confidence level
  type_t : 'two_sided_confidence', 'lower_confidence', 'upper_confidence'
  data : optional, if you do not know the Sample_mean and n, just pass the data
  """
  def __init__(self, population_mean, c_level, type_c, Sample_var = 0., n = 0., data=None):
    self.population_mean = population_mean
    self.type_c = type_c
    self.n = n
    self.Sample_var = Sample_var
    self.c_level = c_level
    self.data = data
    if data is not None:
      self.n = len(list(data))
      self.Sample_var = np.std(list(data), ddof=1)**2

    confidence_interval_for_var_with_known_mean.__test(self)

  def __test(self):
    if self.type_c == 'two_sided_confidence':
      alpha = 1 - self.c_level
      c_u = ((self.n-1) * self.Sample_var) / chi2.isf(1-(alpha/2), self.n-1)
      c_l = ((self.n-1) * self.Sample_var) / chi2.isf(alpha/2, self.n-1)
      c_u_r = np.sqrt(c_u)
      c_l_r = np.sqrt(c_l)
      print(c_l_r," <= σ <= ", c_u_r)
    elif self.type_c == 'lower_confidence':
      alpha = 1 - self.c_level
      c_u = ((self.n-1) * self.Sample_var) / chi2.isf(1-alpha, self.n-1)
      c_u_r = np.sqrt(c_u)
      print("σ <= ", c_u_r)
    elif self.type_c == 'upper_confidence':
      alpha = 1 - self.c_level
      c_l = ((self.n-1) * self.Sample_var) / chi2.isf((alpha), self.n-1)
      c_l_r = np.sqrt(c_l)
      print(c_l_r," <= σ")

In [None]:
data = [5, 8.5, 12, 15, 7, 9, 7.5, 6.5, 10.5]
confidence_interval_for_var_with_known_mean(population_mean = 9.0, c_level = 0.9, type_c = 'two_sided_confidence', data=data)
confidence_interval_for_var_with_known_mean(population_mean = 9.0, c_level = 0.9, type_c = 'lower_confidence', data=data)
confidence_interval_for_var_with_known_mean(population_mean = 9.0, c_level = 0.9, type_c = 'upper_confidence', data=data)
print("Done")

2.2138006925142597  <= σ <=  5.273705093927732
σ <=  4.666838371858391
2.3849436008794878  <= σ
Done


### Theory: Unkown Population Mean

**A. Two-sided Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a unknown variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\ \chi^2_{1-\frac{\alpha}{2}, n-1}\ \leq\ \frac{(n-1)\ S^2}{\sigma^2} \ \leq \chi^2_{\frac{\alpha}{2}, n-1} $

$\ \frac{(n-1)\ S^2}{\chi^2_{\frac{\alpha}{2}, n-1}} \leq\ \sigma^2 \leq\ \frac{(n-1)\ S^2}{\chi^2_{1-\frac{\alpha}{2}, n-1}}$

$\ \sqrt{\frac{(n-1)\ S^2}{\chi^2_{\frac{\alpha}{2}, n-1}}} \leq\ \sigma \leq\ \sqrt{\frac{(n-1)\ S^2}{\chi^2_{1-\frac{\alpha}{2}, n-1}}}$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the variance of a normal population is:

$[\frac{(n-1)\ S^2}{\chi^2_{\frac{\alpha}{2}, n-1}},\ \frac{(n-1)\ S^2}{\chi^2_{1-\frac{\alpha}{2}, n-1}}]$

and the $1-\alpha$ confidence interval for the standard deviation of a normal population is:

$[\sqrt{\frac{(n-1)\ S^2}{\chi^2_{\frac{\alpha}{2}, n-1}}},\ \sqrt{\frac{(n-1)\ S^2}{\chi^2_{1-\frac{\alpha}{2}, n-1}}}]$

**B. One-sided Lower Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a unknown variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\ 0 \leq\ \sigma^2 \leq\ \frac{(n-1)\ S^2}{\chi^2_{1-\alpha, n-1}}$

$\ 0 \leq\ \sigma \leq\ \sqrt{\frac{(n-1)\ S^2}{\chi^2_{1-\alpha, n-1}}}$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the variance of a normal population is:

$[0,\ \frac{(n-1)\ S^2}{\chi^2_{1-\frac{\alpha}{2}, n-1}}]$

and the $1-\alpha$ confidence interval for the standard deviation of a normal population is:

$[0,\ \sqrt{\frac{(n-1)\ S^2}{\chi^2_{1-\alpha, n-1}}}]$

**C. One-sided Upper Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a unknown variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\ \frac{(n-1)\ S^2}{\chi^2_{\alpha, n-1}} \leq\ \sigma^2 \leq\ \infty$

$\ \sqrt{\frac{(n-1)\ S^2}{\chi^2_{\alpha, n-1}}} \leq\ \sigma \leq\ \infty$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the variance of a normal population is:

$[\frac{(n-1)\ S^2}{\chi^2_{\alpha, n-1}},\ \infty]$

and the $1-\alpha$ confidence interval for the standard deviation of a normal population is:

$[\sqrt{\frac{(n-1)\ S^2}{\chi^2_{\alpha, n-1}}},\ \infty]$

### Code: Unknown Population Mean

In [None]:
# Confidence Interval Of Sample Variance For Unknown Population Mean
class confidence_interval_for_var_with_unknown_mean:
  """
  Parameters
  ----------
  population_sd : known standrad deviation of the population
  Sample_var : optional, variance of the sample
  n : optional, number of sample members
  c_level : % confidence level
  type_t : 'two_sided_confidence', 'lower_confidence', 'upper_confidence'
  data : optional, if you do not know the Sample_mean and n, just pass the data
  """
  def __init__(self, c_level, type_c, Sample_var = 0., n = 0., data=None):
    self.type_c = type_c
    self.n = n
    self.Sample_var = Sample_var
    self.c_level = c_level
    self.data = data
    if data is not None:
      self.n = len(list(data))
      self.Sample_var = np.std(list(data), ddof=1)**2

    confidence_interval_for_var_with_unknown_mean.__test(self)

  def __test(self):
    if self.type_c == 'two_sided_confidence':
      alpha = 1 - self.c_level
      c_u = ((self.n-1) * self.Sample_var) / chi2.isf(1-(alpha/2), self.n-1)
      c_l = ((self.n-1) * self.Sample_var) / chi2.isf(alpha/2, self.n-1)
      c_u_r = np.sqrt(c_u)
      c_l_r = np.sqrt(c_l)
      print(c_l_r," <= σ <= ", c_u_r)
    elif self.type_c == 'lower_confidence':
      alpha = 1 - self.c_level
      c_u = ((self.n-1) * self.Sample_var) / chi2.isf(1-alpha, self.n-1)
      c_u_r = np.sqrt(c_u)
      print("σ <= ", c_u_r)
    elif self.type_c == 'upper_confidence':
      alpha = 1 - self.c_level
      c_l = ((self.n-1) * self.Sample_var) / chi2.isf((alpha), self.n-1)
      c_l_r = np.sqrt(c_l)
      print(c_l_r," <= σ")

In [None]:
data = [5, 8.5, 12, 15, 7, 9, 7.5, 6.5, 10.5]
confidence_interval_for_var_with_unknown_mean(c_level = 0.9, type_c = 'two_sided_confidence', data=data)
confidence_interval_for_var_with_unknown_mean(c_level = 0.9, type_c = 'lower_confidence', data=data)
confidence_interval_for_var_with_unknown_mean(c_level = 0.9, type_c = 'upper_confidence', data=data)
print("Done")

2.2138006925142597  <= σ <=  5.273705093927732
σ <=  4.666838371858391
2.3849436008794878  <= σ
Done


# Confidence Intervals For 2 Normal Distributions

In [None]:
from scipy.stats import norm
from scipy.stats import chi2
from scipy.stats import t
from scipy.stats import f
from IPython.display import display, Latex

## Confidence Interval for the Difference in Means of Two Normal Populations

In [None]:
# Known Variances
class confidence_interval_for_two_mean_with_known_variances:
  """
  Parameters
  ----------
  population_sd1 : known standrad deviation of the population1
  population_sd2 : known standrad deviation of the population2
  n1 : optional, number of sample1 members
  n2 : optional, number of sample2 members
  c_level : % confidence level
  type_t : 'two_sided_confidence', 'lower_confidence', 'upper_confidence'
  Sample_mean1 : optional, mean of the sample1
  Sample_mean2 : optional, mean of the sample2
  data : optional, if you do not know the Sample_mean and n, just pass the data
  """
  def __init__(self, population_sd1, population_sd2, c_level, type_c, Sample_mean1 = 0., Sample_mean2 = 0., n1 = 0., n2 = 0., data1=None, data2=None):
    self.Sample_mean1 = Sample_mean1
    self.Sample_mean2 = Sample_mean2
    self.population_sd1 = population_sd1
    self.population_sd2 = population_sd2
    self.type_c = type_c
    self.n1 = n1
    self.n2 = n2
    self.c_level = c_level
    self.data1 = data1
    self.data2 = data2
    if data1 is not None:
      self.Sample_mean1 = np.mean(list(data1))
      self.n1 = len(list(data1))
    if data2 is not None:
      self.Sample_mean2 = np.mean(list(data2))
      self.n2 = len(list(data2))

    confidence_interval_for_two_mean_with_known_variances.__test(self)

  def __test(self):
    if self.type_c == 'two_sided_confidence':
      c_u = self.Sample_mean1 - self.Sample_mean2 + (-norm.ppf((1-self.c_level)/2)) * np.sqrt(self.population_sd1**2/self.n1 + self.population_sd2**2/self.n2)
      c_l = self.Sample_mean1 - self.Sample_mean2 - (-norm.ppf((1-self.c_level)/2)) * np.sqrt(self.population_sd1**2/self.n1 + self.population_sd2**2/self.n2)
      display(Latex(f'${c_l} \leq \mu_x - \mu_y \leq {c_u}$'))
    elif self.type_c == 'lower_confidence':
      c_u = self.Sample_mean1 - self.Sample_mean2 + (-norm.ppf(1-self.c_level)) * np.sqrt(self.population_sd1**2/self.n1 + self.population_sd2**2/self.n2)
      display(Latex(f'$\mu_x - \mu_y \leq {c_u}$'))
    elif self.type_c == 'upper_confidence':
      c_l = self.Sample_mean1 - self.Sample_mean2 - (-norm.ppf(1-self.c_level)) * np.sqrt(self.population_sd1**2/self.n1 + self.population_sd2**2/self.n2)
      display(Latex(f'${c_l} \leq \mu_x - \mu_y$'))

In [None]:
# Unknown but Equal Variances
class confidence_interval_for_two_mean_with_unknown__but_equal_variances:
  """
  Parameters
  ----------
  n1 : optional, number of sample1 members
  n2 : optional, number of sample2 members
  c_level : % confidence level
  type_t : 'two_sided_confidence', 'lower_confidence', 'upper_confidence'
  Sample_mean1 : optional, mean of the sample1
  Sample_mean2 : optional, mean of the sample2
  S1 : optional, std of the sample1
  S2 : optional, std of the sample2
  data : optional, if you do not know the Sample_mean and n, just pass the data
  """
  def __init__(self, c_level, type_c, Sample_mean1 = 0., S1 = 0., S2 = 0., Sample_mean2 = 0., n1 = 0., n2 = 0., data1=None, data2=None):
    self.Sample_mean1 = Sample_mean1
    self.Sample_mean2 = Sample_mean2
    self.S1 = S1
    self.S2 = S2
    self.type_c = type_c
    self.n1 = n1
    self.n2 = n2
    self.c_level = c_level
    self.data1 = data1
    self.data2 = data2
    if data1 is not None:
      self.Sample_mean1 = np.mean(list(data1))
      self.n1 = len(list(data1))
      self.S1 = np.std(list(data1), ddof = 1)
    if data2 is not None:
      self.Sample_mean2 = np.mean(list(data2))
      self.n2 = len(list(data2))
      self.S2 = np.std(list(data2), ddof = 1)

    self.SP2 = ((self.n1-1)*(self.S1**2) + (self.n2-1)*(self.S2**2)) / (self.n1+self.n2-2)

    confidence_interval_for_two_mean_with_unknown__but_equal_variances.__test(self)

  def __test(self):
    if self.type_c == 'two_sided_confidence':
      alpha = 1-self.c_level
      c_u = self.Sample_mean1 - self.Sample_mean2 + (t.isf(alpha/2, df = self.n1+self.n2-2)) * (np.sqrt(self.SP2)*np.sqrt(1/self.n1+1/self.n2))
      c_l = self.Sample_mean1 - self.Sample_mean2 - (t.isf(alpha/2, df = self.n1+self.n2-2)) * (np.sqrt(self.SP2)*np.sqrt(1/self.n1+1/self.n2))
      display(Latex(f'${c_l} \leq \mu_x - \mu_y \leq {c_u}$'))
    elif self.type_c == 'lower_confidence':
      alpha = 1-self.c_level
      c_u = self.Sample_mean1 - self.Sample_mean2 + (t.isf(alpha, df = self.n1+self.n2-2)) * (np.sqrt(self.SP2)*np.sqrt(1/self.n1+1/self.n2))
      display(Latex(f'$\mu_x - \mu_y \leq {c_u}$'))
    elif self.type_c == 'upper_confidence':
      alpha = 1-self.c_level
      c_l = self.Sample_mean1 - self.Sample_mean2 - (t.isf(alpha, df = self.n1+self.n2-2)) * (np.sqrt(self.SP2)*np.sqrt(1/self.n1+1/self.n2))
      display(Latex(f'${c_l} \leq \mu_x - \mu_y$'))

## Confidence Interval for the Ratio of Variances of Two Normal Populations

In [None]:
class confidence_interval_for_ratio_variances:
  """
  Parameters
  ----------
  n1 : optional, number of sample1 members
  n2 : optional, number of sample2 members
  c_level : % confidence level
  type_t : 'two_sided_confidence', 'lower_confidence', 'upper_confidence'
  Sample_mean1 : optional, mean of the sample1
  Sample_mean2 : optional, mean of the sample2
  S1 : optional, std of the sample1
  S2 : optional, std of the sample2
  data : optional, if you do not know the Sample_mean and n, just pass the data
  """
  def __init__(self, c_level, type_c, S1 = 0., S2 = 0., n1 = 0., n2 = 0., data1=None, data2=None):
    self.S1 = S1
    self.S2 = S2
    self.type_c = type_c
    self.n1 = n1
    self.n2 = n2
    self.c_level = c_level
    self.data1 = data1
    self.data2 = data2
    if data1 is not None:
      self.n1 = len(list(data1))
      self.S1 = np.std(list(data1), ddof = 1)
    if data2 is not None:
      self.n2 = len(list(data2))
      self.S2 = np.std(list(data2), ddof = 1)

    confidence_interval_for_ratio_variances.__test(self)

  def __test(self):
    if self.type_c == 'two_sided_confidence':
      alpha = 1-self.c_level
      c_u = ((self.S1**2)/(self.S2**2)) * (1/f.isf(1-alpha/2, self.n1-1, self.n1-1))
      c_l = ((self.S1**2)/(self.S2**2)) * (1/f.isf(alpha/2, self.n1-1, self.n1-1))
      c_u_r = np.sqrt(c_u)
      c_l_r = np.sqrt(c_l)
      display(Latex(f'${c_l} \leq \sigma^2_x/ \sigma^2_y \leq {c_u}$'))
      display(Latex(f'${c_l_r} \leq \sigma_x/ \sigma_y \leq {c_u_r}$'))
    elif self.type_c == 'lower_confidence':
      alpha = 1-self.c_level
      c_u = ((self.S1**2)/(self.S2**2)) * (1/f.isf(1-alpha, self.n1-1, self.n1-1))
      c_u_r = np.sqrt(c_u)
      display(Latex(f'$\sigma^2_x/ \sigma^2_y \leq {c_u}$'))
      display(Latex(f'$\sigma_x/ \sigma_y \leq {c_u_r}$'))
    elif self.type_c == 'upper_confidence':
      alpha = 1-self.c_level
      c_l = ((self.S1**2)/(self.S2**2)) * (1/f.isf(alpha, self.n1-1, self.n1-1))
      c_l_r = np.sqrt(c_l)
      display(Latex(f'${c_l} \leq \sigma^2_x/ \sigma^2_y$'))
      display(Latex(f'${c_l} \leq \sigma_x/ \sigma_y$'))

# Others