In [25]:
import math
import statistics

import numpy as np
import pandas as pd
import scipy.stats

## Measure of Central Tendency

### Mean/Average



In [6]:
x = [8., 1, 2.5, 4, 28.]
x_with_nan = [8.0, 1, 2.5, math.nan, 4, 28.0]

print(x)
print(x_with_nan)

[8.0, 1, 2.5, 4, 28.0]
[8.0, 1, 2.5, nan, 4, 28.0]


In [12]:
np_x, np_x_with_nan = np.array(x), np.array(x_with_nan)
series_x, series_x_with_nan = pd.Series(x), pd.Series(x_with_nan)

print(np_x)
print(np_x_with_nan)

print(series_x)
print(series_x_with_nan)

[ 8.   1.   2.5  4.  28. ]
[ 8.   1.   2.5  nan  4.  28. ]
0     8.0
1     1.0
2     2.5
3     4.0
4    28.0
dtype: float64
0     8.0
1     1.0
2     2.5
3     NaN
4     4.0
5    28.0
dtype: float64


In [17]:
native_mean = sum(x) / len(x)
stats_mean = statistics.mean(x)
stats_mean_with_nan = statistics.mean(x_with_nan)
np_mean = np.mean(np_x)
np_mean_with_nan = np.mean(np_x_with_nan)
np_nanmean_with_nan = np.nanmean(np_x_with_nan)

print("mean", native_mean)
print("mean with statistics:", stats_mean)
print("mean with nan with statistics:", stats_mean_with_nan)
print("mean with numpy:", np_mean)
print("mean with nan with numpy:", np_mean_with_nan)
print("mean with nan with np.nanmean:", np_nanmean_with_nan)

mean 8.7
mean with statistics: 8.7
mean with nan with statistics: nan
mean with numpy: 8.7
mean with nan with numpy: nan
mean with nan with np.nanmean: 8.7


## Weighted mean 

In [21]:
w = [.1, .2, .3, .25, .15]
print("weight:", w)
print("x:", x)


native_weighted_mean = sum([w[i]*x[i] for i in range(len(x))]) / sum(w)
np_weighted_mean = np.average(np_x, weights=w)

print("weighted mean:", native_weighted_mean)
print("weighted mean with numpy:", np_weighted_mean)

weight: [0.1, 0.2, 0.3, 0.25, 0.15]
x: [8.0, 1, 2.5, 4, 28.0]
weighted mean: 6.95
weighted mean with numpy: 6.95


## Harmonic Mean 

In [26]:
print("x:", x)

native_harmonic_mean = len(x) / sum([1/item for item in x])
stats_harmonic_mean = statistics.harmonic_mean(x)
scipy_harmonic_mean = scipy.stats.hmean(np_x)

print("harmonic mean:", native_harmonic_mean)
print("harmonic mean with statistics:", stats_harmonic_mean)
print("harmonic mean with scipy:", scipy_harmonic_mean)

x: [8.0, 1, 2.5, 4, 28.0]
harmonic mean: 2.7613412228796843
harmonic mean with statistics: 2.7613412228796843
harmonic mean with scipy: 2.7613412228796843


## Geometric Mean 

In [27]:
print("x:", x)
native_gmean = 1
for item in x:
    native_gmean *= item
    native_gmean **= (1 / len(x))
    scipy_gmean = scipy.stats.gmean(np_x)
    
    
native_gmean **= (1 / len(x))
print("geometric mean:", native_gmean)
print("geometric mean with scipy:", scipy_gmean)

x: [8.0, 1, 2.5, 4, 28.0]
geometric mean: 1.1571703651884873
geometric mean with scipy: 4.67788567485604


## Median 

In [57]:
print("x:", x)
print("sorted x:", sorted(x))

new_x = x[:-1].copy()
print("new x:", sorted(new_x))
n = len(new_x)
if n % 2:
    # add n 
    native_median = sorted(new_x)[round(5. * (n-1))]
else: 
    # even n 
    sorted_x, med_index = sorted(new_x), round(.5 * n)
    native_median = (sorted_x[med_index - 1] + sorted_x[med_index]) / 2
stats_median = statistics.median(new_x)
stats_median_low = statistics.median_low(new_x)
stats_median_high = statistics.median_high(new_x)
np_median = np.median(new_x)
    
print("median:", native_median)
print("median with statistics:", stats_median)
print("median low with statistics:", stats_median_low)
print("median high with statistics:", stats_median_high)
print("median with numpy:", np_median)

x: [8.0, 1, 2.5, 4, 28.0]
sorted x: [1, 2.5, 4, 8.0, 28.0]
new x: [1, 2.5, 4, 8.0]
median: 3.25
median with statistics: 3.25
median low with statistics: 2.5
median high with statistics: 4
median with numpy: 3.25


## Mode

In [52]:
max([(v.count(item), item) for item in set(u)])

(3, 12)

In [56]:
u = [2, 3, 2, 8, 12]
v = [12, 15, 12, 15, 21, 15, 12]

native_mode = max([(v.count(item), item) for item in set(u)])[1]
stats_mode = statistics.mode(u)
scipy_mode = scipy.stats.mode(u)

print(u)
print(v)
print("mode:", native_mode)
print("mode with statistics:", stats_mode)
print("mode with scipy:", scipy_mode.mode[0], f"({scipy_mode.count[0]})")

[2, 3, 2, 8, 12]
[12, 15, 12, 15, 21, 15, 12]
mode: 12
mode with statistics: 2
mode with scipy: 2 (2)


## Measures of Spread/Variability

### Variance

In [70]:
print("x:", x)
print("mean:", native_mean)

native_var = sum([(item - native_mean)**2 for item in x]) / (len(x) -1)
stats_var = statistics.variance(x)
np_var = np.var(np_x, ddof=1)

print("variance:", native_var)
print("variance with statistics:", stats_var)
print("variance with numpy:", np_var)
print("variance with pandas seris:", series_x.var())

x: [8.0, 1, 2.5, 4, 28.0]
mean: 8.7
variance: 123.19999999999999
variance with statistics: 123.2
variance with numpy: 123.19999999999999
variance with pandas seris: 123.19999999999999


## Standard Deviattion

In [71]:
native_std = native_var ** .5
stats_std = statistics.stdev(x)
np_std = np.std(np_x, ddof=1)

print("standard dev:", native_std)
print("stdev with statistics:", stats_std)
print("stdev with numpy:", np_std)
print("stdev with pandas series:", series_x.std())

standard dev: 11.099549540409285
stdev with statistics: 11.099549540409287
stdev with numpy: 11.099549540409285
stdev with pandas series: 11.099549540409285


## Skewness

In [75]:
print("x:", x)

scipy_skew = scipy.stats.skew(np_x, bias=False)

print("skewness with scipy:", scipy_skew)
print("skewness with pandas series:", series_x.skew())

x: [8.0, 1, 2.5, 4, 28.0]
skewness with scipy: 1.9470432273905927
skewness with pandas series: 1.9470432273905924


## Quantiles,Percentiles, Quartiles

In [82]:
x = [-5., -1.1, .1, 2., 8., 12.8, 21., 25.8, 41.]
print("x:", x)

print(statistics.quantiles(x, n=4, method="inclusive"))
print(statistics.quantiles(x, n=2, method="inclusive"))

print(np.percentile(x, [25, 50, 75])) # quartiles
print(np.quantile(x, [.25, .5, .75])) #quantiles
print(np.median(x))

x: [-5.0, -1.1, 0.1, 2.0, 8.0, 12.8, 21.0, 25.8, 41.0]
[0.1, 8.0, 21.0]
[8.0]
[ 0.1  8.  21. ]
[ 0.1  8.  21. ]
8.0


## Range

In [84]:
np_x = np.array(x)

print("max:", np_x.max())
print("min:", np_x.min())
print(np.ptp(x))
print(np_x.max() - np_x.min())

max: 41.0
min: -5.0
46.0
46.0
