In [1]:
import random

random.seed(0)
salaries = [round(random.random()*1000000, -3) for _ in range(100)]

In [2]:
# Mean
from statistics import mean
sum(salaries) / len(salaries) == mean(salaries)

True

In [3]:
# Median
import math
def find_median(x):
    x.sort()
    midpoint = (len(x) + 1) / 2 - 1 # subtract 1 bc index starts at 0
    if len(x) % 2:
        # x has odd number of values
        return x[int(midpoint)]
    else:
        return (x[math.floor(midpoint)] + x[math.ceil(midpoint)]) / 2

In [4]:
# Median
from statistics import median
find_median(salaries) == median(salaries)

True

In [5]:
# Mode
from statistics import mode
from collections import Counter
Counter(salaries).most_common(1)[0][0] == mode(salaries)

True

In [6]:
# Sample Variance
from statistics import variance
sum([(x - sum(salaries) / len(salaries))**2 for x in salaries]) / (len(salaries) - 1) == variance(salaries)

True

In [7]:
# Sample Standard Deviation
from statistics import stdev
import math
math.sqrt(sum([(x - sum(salaries) / len(salaries))**2 for x in salaries]) / (len(salaries) - 1)) == stdev(salaries)

True

In [8]:
# Range
max(salaries) - min(salaries)

995000.0

In [9]:
# Coefficient of Variation
from statistics import mean, stdev
stdev(salaries) / mean(salaries)

0.45386998894439035

In [10]:
# Interquartile Range
import math
def quantile(x, pct):
    x.sort()
    index = (len(x) + 1) * pct - 1
    if len(x) % 2:
        # odd, so grab the value at index
        return x[int(index)]
    else:
        return (x[math.floor(index)] + x[math.ceil(index)]) / 2

In [11]:
# 1st Quartile
sum([x < quantile(salaries, 0.25) for x in salaries]) / len(salaries) == 0.25

True

In [13]:
# 3rd Quartile
sum([x < quantile(salaries, 0.75) for x in salaries]) / len(salaries) == 0.75

True

In [14]:
# IQR
q3, q1 = quantile(salaries, 0.75), quantile(salaries, 0.25)
iqr = q3 - q1
iqr

417500.0

In [15]:
# Quartile Coefficient of Dispersion
iqr / (q1 + q3)

0.3417928776094965

In [16]:
# Min-Max Scaling
min_salary, max_salary = min(salaries), max(salaries)
salary_range = max_salary - min_salary

min_max_scaled = [(x - min_salary) / salary_range for x in salaries]
min_max_scaled[:5]

[0.0,
 0.01306532663316583,
 0.07939698492462312,
 0.0814070351758794,
 0.08944723618090453]

In [17]:
# Standardizing
mean_salary, std_salary = mean(salaries), stdev(salaries)
standardized = [(x - mean_salary) / std_salary for x in salaries]
standardized[:5]

[-2.199512275430514,
 -2.150608309943509,
 -1.9023266390094862,
 -1.8948029520114855,
 -1.8647082040194827]

In [18]:
# Covariance
import numpy as np
np.cov(min_max_scaled, standardized)

array([[0.07137603, 0.26716293],
       [0.26716293, 1.        ]])

In [19]:
from statistics import mean
running_total = [
    (x - mean(min_max_scaled)) * (y - mean(standardized))
    for x, y in zip(min_max_scaled, standardized)
]

cov = mean(running_total)
cov

0.26449129918250414

In [20]:
# Pearson Correlation Coefficient
from statistics import stdev
cov / (stdev(min_max_scaled) * stdev(standardized))

0.9900000000000001