In [1]:
import math
import statistics

import numpy as np
import scipy
import scipy.stats
import pandas as pd

In [2]:
print("numpy:", np.__version__)
print("scipy:", scipy.__version__)
print("pandas:", pd.__version__)

numpy: 1.19.2
scipy: 1.5.2
pandas: 1.1.3


In [3]:
x = [8.0, 1, 2.5, 4, 28]
x_with_nan = [8.,1,2.5,math.nan,4,28.]
print(x,len(x))
print(x_with_nan,len(x_with_nan))

[8.0, 1, 2.5, 4, 28] 5
[8.0, 1, 2.5, nan, 4, 28.0] 6


In [4]:
y, y_with_nan = np.array(x), np.array(x_with_nan)
z, z_with_nan = pd.Series(x), pd.Series(x_with_nan)
print(y)
print(y_with_nan)
print(z)
print(z_with_nan)

[ 8.   1.   2.5  4.  28. ]
[ 8.   1.   2.5  nan  4.  28. ]
0     8.0
1     1.0
2     2.5
3     4.0
4    28.0
dtype: float64
0     8.0
1     1.0
2     2.5
3     NaN
4     4.0
5    28.0
dtype: float64


# Measures of Central Tendency

## Mean

In [5]:
print("x:", x)
mean_native = sum(x) / len(x)
print("mean native:", mean_native)
mean_stats = statistics.mean(x)
print("mean statistics:", mean_stats)

x: [8.0, 1, 2.5, 4, 28]
mean native: 8.7
mean statistics: 8.7


In [8]:
print("x with nan:",x_with_nan)
mean_with_nan_native = sum(x_with_nan) / len(x_with_nan)
print("mean with nan native:",mean_with_nan_native)
mean_with_nan_statistics = statistics.mean(x_with_nan)
print("mean with nan statistics:",mean_with_nan_statistics)
mean_with_nan_np = np.mean(y_with_nan)
print("mean with nan numpy:",mean_with_nan_np)

x with nan: [8.0, 1, 2.5, nan, 4, 28.0]
mean with nan native: nan
mean with nan statistics: nan
mean with nan numpy: nan


In [10]:
np.nanmean(y)

8.7

In [11]:
mean_ignoring_nan_np = np.nanmean(x_with_nan)
print("mean ignoring nan np:", mean_ignoring_nan_np)

mean ignoring nan np: 8.7


In [12]:
z

0     8.0
1     1.0
2     2.5
3     4.0
4    28.0
dtype: float64

In [14]:
z_with_nan.mean()

8.7

In [16]:
arr = [2,2,4,4,4,4,4,8,8,8]
print(arr, len(arr))
print("mean:",np.mean(arr))
print("weighted mean:", 0.2*2 + 0.5*4 + 0.3*8)

[2, 2, 4, 4, 4, 4, 4, 8, 8, 8] 10
mean: 4.8
weighted mean: 4.8


In [20]:
arr_x = [8.,1,2.5,4]
arr_w = [.1,.2,.3,.25]

print(
    "weighted mean:", 
    sum(w*x for x, w in zip(arr_x,arr_w))/sum(arr_w)
)

weighted mean: 3.2352941176470584


In [21]:
np_x,np_w = np.array(arr_x),np.array(arr_w)
np.average(np_x, weights=np_w)

3.2352941176470584

## Harmonic Mean 

In [34]:
x += [100]
print(x)
print("arithmetic mean:",sum(x)/len(x))
print("harmonic mean:",len(x)/sum(1/i for i in x))

[8.0, 1, 2.5, 4, 28, 100, 100, 100, 100, 100, 100, -100, -100]
arithmetic mean: 34.11538461538461
harmonic mean: 7.0243149363180235


In [30]:
scipy.stats.hmean(x)

5.880106911034747

In [41]:
gmean = 1

for item in x:
    gmean += item

gmean = gmean**(1/len(x))
print(gmean)
print(scipy.stats.gmean(x))

1.5983888578501444
nan


In [44]:
print(x, len(x), sorted(x))
print("median np:", np.median(x))

if len(x) % 2:
    med = sorted(x)[round(0.5 * (len(x)-1))]
else:
    x_ordered, index = sorted(x), round(0.5 * len(x))
    med = 0.5 * (ordered_x[index-1] + ordered_x[index])
print("median native:",med)

[8.0, 1, 2.5, 4, 28, 100, 100, 100, 100, 100, 100, -100, -100] 13 [-100, -100, 1, 2.5, 4, 8.0, 28, 100, 100, 100, 100, 100, 100]
median np: 28.0
median native: 28


In [45]:
statistics.median(x), statistics.median_high(x), statistics.median_low(x)

(28, 28, 28)

In [48]:
n = len(x)
mean_x = sum(x)/len(x)
print(mean_x)

34.11538461538461


In [55]:
print(x)
statistics.quantiles(x)

Python 3.8.5
[8.0, 1, 2.5, 4, 28, 100, 100, 100, 100, 100, 100, -100, -100]


[1.75, 28.0, 100.0]