In [2]:
import math
import statistics

import numpy as np
import scipy.stats
import pandas as pd

In [3]:
print('NumPy Version:', np.__version__)
print('SciPy Version:', scipy.__version__)
print('Pandas Version:', pd.__version__)

NumPy Version: 1.19.1
SciPy Version: 1.5.0
Pandas Version: 1.1.2


In [41]:
x=[8.0, 1, 2.5, 4, 28.0]
x_with_nan=[8.0, 1, 2.5, math.nan, 4, 28.0]
print(x, len(x))
print(x_with_nan, len(x_with_nan))

[8.0, 1, 2.5, 4, 28.0] 5
[8.0, 1, 2.5, nan, 4, 28.0] 6


In [9]:
y, y_with_nan=np.array(x), np.array(x_with_nan)
z, z_with_nan=pd.Series(x), pd.Series(x_with_nan)
print(y)
print('-----')
print(y_with_nan)
print('-----')
print(z)
print('-----')
print(z_with_nan)

[ 8.   1.   2.5  4.  28. ]
-----
[ 8.   1.   2.5  nan  4.  28. ]
-----
0     8.0
1     1.0
2     2.5
3     4.0
4    28.0
dtype: float64
-----
0     8.0
1     1.0
2     2.5
3     NaN
4     4.0
5    28.0
dtype: float64


## Measures of Central Tendency

### Mean

In [10]:
print('x:', x)
mean_native=sum(x)/len(x)
print('Mean Native:', mean_native)

x: [8.0, 1, 2.5, 4, 28.0]
Mean Native: 8.7


In [11]:
print('x:', x)
mean_native=sum(x)/len(x)
print('Mean Native:', mean_native)
mean_stats=statistics.mean(x)
print('Mean Statistics:', mean_stats)

x: [8.0, 1, 2.5, 4, 28.0]
Mean Native: 8.7
Mean Statistics: 8.7


In [12]:
print('X With Nan:', x_with_nan)
mean_with_nan_native=sum(x_with_nan)/len(x_with_nan)
print('Mean With Mean Native:', mean_with_nan_native)

X With Nan: [8.0, 1, 2.5, nan, 4, 28.0]
Mean With Mean Native: nan


In [14]:
print('X With Nan:', x_with_nan)
mean_with_nan_native=sum(x_with_nan)/len(x_with_nan)
print('Mean With Mean Native:', mean_with_nan_native)
mean_with_nan_stats=statistics.mean(x_with_nan)
print('Mean With Mean Statistics:', mean_with_nan_stats)

X With Nan: [8.0, 1, 2.5, nan, 4, 28.0]
Mean With Mean Native: nan
Mean With Mean Statistics: nan


In [16]:
print('X With Nan:', x_with_nan)
mean_with_nan_native=sum(x_with_nan)/len(x_with_nan)
print('Mean With Mean Native:', mean_with_nan_native)
mean_with_nan_stats=statistics.mean(x_with_nan)
print('Mean With Mean Statistics:', mean_with_nan_stats)
mean_with_nan_np=np.mean(y_with_nan) #bisa juga di tulis y_with_nan_mean()
print('Mean With Nan Numpy:', mean_with_nan_np)

X With Nan: [8.0, 1, 2.5, nan, 4, 28.0]
Mean With Mean Native: nan
Mean With Mean Statistics: nan
Mean With Nan Numpy: nan


In [21]:
mean_ignoring_nan_np=np.nanmean(x_with_nan)
print('Mean Ignoring Nan Numpy:',mean_ignoring_nan_np)

Mean Ignoring Nan Numpy: 8.7


In [23]:
z_with_nan.mean(), z_with_nan.mean(skipna=(False))

(8.7, nan)

In [None]:
arr=[2, 2, 4, 4, 4, 4, 4, 8, 8, 8]
print(arr, len(arr))
print('Mean:', np.mean(arr))
print('Weighted Mean:', 0.2*2 + 0.5*4 + 0.3*8)

In [28]:
arr_x=[8., 1, 2.5, 4, 28.]
arr_w=[.1, .2, .3, .25]
print(
    'Weighted Mean:',
    sum(w*x for x, w in zip(arr_x, arr_w))/sum(arr_w)
)

Weighted Mean: 3.2352941176470584


In [29]:
arr_x=[8., 1, 2.5, 4, 28.]
arr_w=[.1, .2, .3, .25]
print(
    'Weighted Mean:',
    sum(x*w for x, w in zip(arr_x, arr_w))/sum(arr_w)
)

Weighted Mean: 3.2352941176470584


In [35]:
arr_x=[8., 1, 2.5, 4,]
arr_w=[.1, .2, .3, .25, ]
print('Sum of the Weight:', sum(arr_w))
print(
    'Weighted Mean:',
    sum(w*x for x, w in zip(arr_x, arr_w))/sum(arr_w)
)

Sum of the Weight: 0.8500000000000001
Weighted Mean: 3.2352941176470584


In [36]:
np_x, np_w=np.array(arr_x), np.array(arr_w)
np.average(np_x, weights=np_w)

3.2352941176470584

In [43]:
x += [100]
print(x)

[8.0, 1, 2.5, 4, 28.0, 100, 100]


In [45]:
print(x)
print('Arithmetic Mean:', sum(x)/len(x))
print('Harmonic Mean:', len(x)/sum(1/i for i in x))

[8.0, 1, 2.5, 4, 28.0, 100, 100]
Arithmetic Mean: 34.785714285714285
Harmonic Mean: 3.8236441669918064


In [47]:
scipy.stats.hmean(x)

3.8236441669918064

In [48]:
gmean=1
for item in x:
    gmean *= item
gmean=gmean**(1/len(x))
print(gmean)

11.221099960393236


In [49]:
gmean=1
for item in x:
    gmean *= item
gmean=gmean**(1/len(x))
print(gmean)
print(scipy.stats.gmean(x))

11.221099960393236
11.221099960393238


### Median

In [51]:
print(x, len(x), sorted(x))
print('Median NumPy:', np.median(x))
print('-----')
if len(x)%2:
    med=sorted(x)[round(0.5*(len(x)-1))]
else:
    ordered_x, index = sorted(x), round(0.5*len(x))
    med=0.5*(ordered_x[index-1]+ordered_x[index])
print('Median Native:', med)

[8.0, 1, 2.5, 4, 28.0, 100, 100] 7 [1, 2.5, 4, 8.0, 28.0, 100, 100]
Median NumPy: 8.0
-----
Median Native: 8.0


In [52]:
x += [1000]
print(x, len(x), sorted(x))
print('Median NumPy:', np.median(x))
print('-----')
if len(x)%2:
    med=sorted(x)[round(0.5*(len(x)-1))]
else:
    ordered_x, index = sorted(x), round(0.5*len(x))
    med=0.5*(ordered_x[index-1]+ordered_x[index])
print('Median Native:', med)

[8.0, 1, 2.5, 4, 28.0, 100, 100, 1000] 8 [1, 2.5, 4, 8.0, 28.0, 100, 100, 1000]
Median NumPy: 18.0
-----
Median Native: 18.0


In [54]:
statistics.median(x), statistics.median_high(x), statistics.median_low(x)

(18.0, 28.0, 8.0)

In [55]:
statistics.median(x[:-1]), statistics.median_high(x[:-1]), statistics.median_low(x[:-1])

(8.0, 8.0, 8.0)

In [59]:
sorted(x_with_nan), statistics.median(x_with_nan), statistics.median_low(x_with_nan), statistics.median_high(x_with_nan)

([1, 2.5, 4, 8.0, nan, 28.0], 6.0, 4, 8.0)

In [61]:
print(z)
print('-----')
print(z.median())
print('-----')
print(z_with_nan.sort_values())
print('-----')
print(z_with_nan.median())

0     8.0
1     1.0
2     2.5
3     4.0
4    28.0
dtype: float64
-----
4.0
-----
1     1.0
2     2.5
4     4.0
0     8.0
5    28.0
3     NaN
dtype: float64
-----
4.0


In [62]:
print(z)
print('-----')
print(z.median())
print('-----')
print(z_with_nan.sort_values())
print('-----')
print(z_with_nan.mean())

0     8.0
1     1.0
2     2.5
3     4.0
4    28.0
dtype: float64
-----
4.0
-----
1     1.0
2     2.5
4     4.0
0     8.0
5    28.0
3     NaN
dtype: float64
-----
8.7


### Mode

In [70]:
x += [1000]

In [71]:
print(x)
print ([x.count (i) for i in x])

[8.0, 1, 2.5, 4, 28.0, 100, 100, 1000, 1000]
[1, 1, 1, 1, 1, 2, 2, 2, 2]


In [72]:
scipy.stats.mode(x)

ModeResult(mode=array([100.]), count=array([2]))

In [66]:
mode_=scipy.stats.mode(x)
mode_

ModeResult(mode=array([100.]), count=array([2]))

In [67]:
mode_=scipy.stats.mode(x)
print(mode_.mode, mode_.count)

[100.] [2]


In [73]:
series_x=pd.Series(x)
series_x.mode()

0     100.0
1    1000.0
dtype: float64

In [69]:
print(z)
print(z.mode())

0     8.0
1     1.0
2     2.5
3     4.0
4    28.0
dtype: float64
0     1.0
1     2.5
2     4.0
3     8.0
4    28.0
dtype: float64


### Variance

In [75]:
x

[8.0, 1, 2.5, 4, 28.0, 100, 100, 1000, 1000]

In [77]:
n = len(x)
mean_x=sum(x)/len(x)
print(mean_x)
variance_x=sum((point - mean_x)**2 for point in x)/(n-1)
print(variance_x)

249.27777777777777
182702.06944444444


In [84]:
n = len(x)
mean_x=sum(x)/len(x)
print(mean_x)
print('-----')
variance_x=sum((point - mean_x)**2 for point in x)/(n-1)
print(variance_x)
print('-----')
print(statistics.variance(x))
print('-----')
print(np.var(np.array(x), ddof=1))
print('-----')
print(series_x.var())

249.27777777777777
-----
182702.06944444444
-----
182702.06944444444
-----
182702.06944444444
-----
182702.06944444444


### Standard Deviation

In [87]:
print('Std Native', variance_x**0.5)
print('Std NumPy:', np.std(x, ddof=1))
print('Std Statistics:', statistics.stdev(x))
print('Std Pandas Series:', series_x.std())
print('Std SciPy:', scipy.std(x, ddof=1))

Std Native 427.4366262318245
Std NumPy: 427.4366262318245
Std Statistics: 427.4366262318245
Std Pandas Series: 427.4366262318245
Std SciPy: 427.4366262318245


  print('Std SciPy:', scipy.std(x, ddof=1))


### Skewness

In [96]:
print(x)
n = len(x)
mean_x=sum(x)/n
variance_x=sum((point - mean_x)**2 for point in x)/(n-1)
std_x=variance_x**0.5

skew_x=sum((item-mean_x)**3 for item in x)*n/((n-1)*(n-2)*std_x**3)
print(skew_x)
print(scipy.stats.skew(x, bias=False))
print(series_x.skew())

[8.0, 1, 2.5, 4, 28.0, 100, 100, 1000, 1000]
1.5837470893089505
1.5837470893089503
1.583747089308951


### Percentiles

In [99]:
!python --version

Python 3.8.5


In [115]:
print(x)
print(statistics.quantiles(x, n=4, method='inclusive'))

[8.0, 1, 2.5, 4, 28.0, 100, 100, 1000, 1000]
[4.0, 28.0, 100.0]


In [108]:
print(x)
print(np.percentile(x, 50))
print(np.percentile(x, [25, 50, 75]))
print(np.quantile(x, .5))
print(np.median(x))

[8.0, 1, 2.5, 4, 28.0, 100, 100, 1000, 1000]
28.0
[  4.  28. 100.]
28.0
28.0


In [112]:
print(x)
print('-----')
print(np.percentile(x, 50))
print('-----')
print(np.percentile(x, [25, 50, 75]))
print('-----')
print(np.quantile(x, [.25, .5, .75]))
print('-----')
print(np.median(x))

[8.0, 1, 2.5, 4, 28.0, 100, 100, 1000, 1000]
-----
28.0
-----
[  4.  28. 100.]
-----
[  4.  28. 100.]
-----
28.0


In [109]:
print(x_with_nan)
print(np.nanpercentile(x_with_nan, 50), np.nanmedian(x_with_nan))
print(np.nanquantile(x_with_nan, .5))

[8.0, 1, 2.5, nan, 4, 28.0]
4.0 4.0
4.0


In [114]:
q1=np.quantile(x, .25)
q3=np.quantile(x, .75)
interquantile=q3-q1
print(q1, q3, interquantile)

4.0 100.0 96.0
