<a href="https://colab.research.google.com/github/g-r-a-e-m-e/essential-math-for-data-science/blob/main/chapter_03.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Chapter 03

## Descriptive and Inferential Statistics

### Descriptive Statistics

#### Mean and Weighted Mean

In [1]:
# Example 3-1. Calculating mean
# Number of pets each person owns
sample = [1, 3, 2, 5, 7, 0, 2, 3]

mean = sum(sample) / len(sample)

print(mean)

2.875


In [2]:
# Exampl 3-2. Calculating a weighted mean
sample = [90, 80, 63, 87]
weights = [.20, .20, .20, .40]

weighted_mean = sum(s * w for s, w in zip(sample, weights)) / sum(weights)

print(weighted_mean)

81.4


In [3]:
# Example 3-3. Calculating another weighted mean
sample = [90, 80, 63, 87]
weights = [1.0, 1.0, 1.0, 2.0]

weighted_mean = sum(s * w for s, w in zip(sample, weights)) / sum(weights)

print(weighted_mean)

81.4


#### Median

In [4]:
# Example 3-4. Calculating the median
# Number of pets each person owns
sample = [0, 1, 5, 7, 9, 10, 14]

def median(values):
  ordered = sorted(values)
  print(ordered)
  n = len(ordered)
  mid = int(n / 2) - 1 if n % 2 == 0 else int(n / 2)

  if n % 2 == 0:
    return (ordered[mid] + ordered[mid + 1]) / 2.0
  else:
    return ordered[mid]

print(median(sample))

[0, 1, 5, 7, 9, 10, 14]
7


#### Mode

In [5]:
# Example 3-5. Calculating the mode
from collections import defaultdict

sample = [1, 3, 2, 5, 7, 0, 2, 3]

def mode(values):
  counts = defaultdict(lambda: 0)

  for s in values:
    counts[s] += 1
  
  max_count = max(counts.values())
  modes = [v for v in set(values) if counts[v] == max_count]

  return modes

print(mode(sample))

[2, 3]


#### Variance and Standard Deviation

In [6]:
# Example 3-6: Calculating variance

data = [0, 1, 5, 7, 9, 10, 14]

def variance(values):
  mean = sum(values) / len(values)
  _variance = sum((v - mean) ** 2 for v in values) / len(values)
  return _variance

print(variance(data))

21.387755102040813


In [7]:
# Example 3-7. Calculating standard deviation
from math import sqrt

data = [0, 1, 5, 7, 9, 10, 14]

def variance(values):
  mean = sum(values) / len(values)
  _variance = sum((v - mean) ** 2 for v in values) / len(values)
  return _variance

def std_dev(values):
  return sqrt(variance(values))

print(std_dev(data))

4.624689730353898


##### Sample Variance and Standard Deviation

In [8]:
# Example 3-8. Calculating standard deviation for a sample
from math import sqrt

data = [0, 1, 5, 7, 9, 10, 14]

def variance(values, is_sample: bool = False):
  mean = sum(values) / len(values)
  _variance = sum((v - mean) ** 2 for v in values) / (len(values) - (1 if is_sample else 0))
  
  return _variance

def std_dev(values, is_sample: bool = False):
  return sqrt(variance(values, is_sample))

print(f"Variance = {variance(data, True)}")
print(f"Standard Deviation = {std_dev(data, True)}")

Variance = 24.95238095238095
Standard Deviation = 4.99523582550223


##### Probability Density Function

In [10]:
# Example 3-9 The normal distribution PDF
import math
def normal_pdf(x: float, mean: float, std_dev: float):
  return (1.0 / (2.0* math.pi * std_dev ** 2) ** 0.5) * math.exp(-1.0 * ((x - mean) ** 2 / (2.0 * std_dev ** 2)))

normal_pdf(6.0, 3.0, 1.0)

0.13342551183994403

##### Cumulative Density Function

In [11]:
# Example 3-10. The normal distribution CDF
from scipy.stats import norm

mean = 64.43
sdev = 2.99

x = norm.cdf(64.43, mean, sdev)

print(x)

0.5


In [12]:
# Example 3-11. Getting a middle range probability using the CDF
from scipy.stats import norm

mean = 64.43
sdev = 2.99

x = norm.cdf(66, mean, sdev) - norm.cdf(62, mean, sdev)

print(x)

0.4920450147062894


#### The Inverse CDF