In [3]:
import math
import statistics

import numpy as np
import scipy
import scipy.stats
import pandas as pd

In [2]:
x = [8., 1, 2.5, 4, 28.]
x_with_nan = [8., 1, 2.5, math.nan, 4, 28.]
print(x)
print(x_with_nan)

[8.0, 1, 2.5, 4, 28.0]
[8.0, 1, 2.5, nan, 4, 28.0]


In [8]:
y, y_with_nan = np.array(x), np.array(x_with_nan)
print(y)
print(y_with_nan)

[ 8.   1.   2.5  4.  28. ]
[ 8.   1.   2.5  nan  4.  28. ]


In [6]:
z, z_with_nan = pd.Series(x), pd.Series(x_with_nan)
print(z)
print(z_with_nan)

0     8.0
1     1.0
2     2.5
3     4.0
4    28.0
dtype: float64
0     8.0
1     1.0
2     2.5
3     NaN
4     4.0
5    28.0
dtype: float64


In [10]:
mean_ = sum(x) / len(x)
print(mean_)
mean_stats = statistics.mean(x)
print("Mean Statistic : ", mean_stats)

8.7
Mean Statistic :  8.7


In [11]:
mean_with_nan_native = sum(x_with_nan) / len(x_with_nan)
print(mean_with_nan_native)

nan


In [12]:
mean_with_nan_stats = statistics.mean(x_with_nan)
print(mean_with_nan_stats)

nan


In [14]:
mean_with_np = np.mean(y)
print(mean_with_np)

8.7


In [18]:
mean_ignoring_nan_np = np.nanmean(x_with_nan)
print(mean_ignoring_nan_np)

8.7


In [20]:
z.mean()

8.7

In [21]:
0.2 * 2 + 0.5 * 4 + 0.3 * 8

4.8

In [22]:
arr = [2, 2, 4, 4, 4, 4, 4, 8, 8]
print(arr, len(arr))
print(np.mean(arr))

[2, 2, 4, 4, 4, 4, 4, 8, 8] 9
4.444444444444445


In [27]:
arr_x = [8.0, 1, 2.5, 4, 10]
arr_w = [0.1, 0.2, 0.3, 0.25, 7]
print(
    "weightmean : ",
    sum(w*x for x, w in zip(arr_x, arr_w)) / sum(arr_w)
)

weightmean :  9.26751592356688


In [28]:
np_x, np_w = np.array(arr_x), np.array(arr_w)
np.average(np_x, weights=np_w)

9.26751592356688

In [29]:
x += [100]
print(x)

[8.0, 1, 2.5, 4, 28.0, 100]


In [30]:
print(sum(x) / len(x))

23.916666666666668


In [32]:
print(len(x) / sum(1 / i for i in x))

3.2954099646920363


In [33]:
scipy.stats.hmean(x)

3.2954099646920363

In [35]:
gmean = 1

for item in x:
    gmean *= item

gmean = gmean**(1/len(x))
print(gmean)
print(scipy.stats.gmean(x))

7.793059696775923
7.7930596967759245


In [37]:
print(x, len(x), sorted(x))
print(np.median(x))

[8.0, 1, 2.5, 4, 28.0, 100] 6 [1, 2.5, 4, 8.0, 28.0, 100]
6.0


In [40]:
if len(x) %2:
    med = sorted(x)[round(0.5*(len(x)-1))]
else:
    ordered_x, index = sorted(x), round(0.5 *len(x))
    med = 0.5 * (ordered_x[index-1]+ ordered_x[index])
print('median native:', med)

median native: 6.0


In [41]:
statistics.median(x), statistics.median_high(x), statistics.median_low(x)

(6.0, 8.0, 4)

In [45]:
sorted(x_with_nan),
statistics.median(x_with_nan)

6.0

In [46]:
print(z)
print(z.median())

0     8.0
1     1.0
2     2.5
3     4.0
4    28.0
dtype: float64
4.0


In [51]:
print(x)
print(x.count(100))

[8.0, 1, 2.5, 4, 28.0, 100]
1


In [53]:
mode_ = scipy.stats.mode(x)
print(mode_)

ModeResult(mode=array([1.]), count=array([1]))


In [54]:
mode_.mode, mode_.count

(array([1.]), array([1]))

In [55]:
u = pd.Series(x)
print(u)

0      8.0
1      1.0
2      2.5
3      4.0
4     28.0
5    100.0
dtype: float64


In [58]:
print(z)
print(z.mode())
print(u.mode())

0     8.0
1     1.0
2     2.5
3     4.0
4    28.0
dtype: float64
0     1.0
1     2.5
2     4.0
3     8.0
4    28.0
dtype: float64
0      1.0
1      2.5
2      4.0
3      8.0
4     28.0
5    100.0
dtype: float64


In [64]:
print(x)
n = len(x)
mean_x = sum(x)/n
variance_x= sum((point-mean_x)**2 for point in x) / (n-1)
std_x = variance_x ** 0.5

skew_x = (sum((item-mean_x)**3 for item in x) * n / ((n-1) * (n-2) * std_x**3))
print(skew_x)
print(scipy.stats.skew(x, bias=False))

[8.0, 1, 2.5, 4, 28.0, 100]
2.12597755867213
2.1259775586721306


In [65]:
z.skew()

1.9470432273905924

In [66]:
print(x)
print(statistics.quantiles(x))

[8.0, 1, 2.5, 4, 28.0, 100]
[2.125, 6.0, 46.0]


In [69]:
print(np.percentile(x, [90, 25, 75]))

[64.     2.875 23.   ]


In [73]:
print(np.nanpercentile(x_with_nan, 50))
print(np.nanquantile(x_with_nan, 0.5))

4.0
4.0


In [74]:
quantile_1 = np.quantile(x, .25)
quantile_2 = np.quantile(x, .75)
print(quantile_1 - quantile_2)

-20.125
