# 7 Statistics with NumPy
## 7_4 Averages and Variance in NumPy

#### numpy.median(a, axis=None, out=None, overwrite_input=False, keepdims=False)
- Compute the median along the specified axis.
- Returns the median of the array elements.

#### numpy.average(a, axis=None, weights=None, returned=False, *, keepdims=<no value>)
- Compute the weighted average along the specified axis.

#### numpy.mean(a, axis=None, dtype=None, out=None, keepdims=<no value>, *, where=<no value>)
- Compute the arithmetic mean along the specified axis.
- Returns the average of the array elements. The average is taken over the flattened array by default, otherwise over the specified axis. float64 intermediate and return values are used for integer inputs.

#### numpy.std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=<no value>, *, where=<no value>, mean=<no value>, correction=<no value>)
- Compute the standard deviation along the specified axis.
- Returns the standard deviation, a measure of the spread of a distribution, of the array elements. The standard deviation is computed for the flattened array by default, otherwise over the specified axis.
- https://www.mathsisfun.com/data/standard-deviation.html

#### numpy.var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=<no value>, *, where=<no value>, mean=<no value>, correction=<no value>)
- Compute the variance along the specified axis.
- Returns the variance of the array elements, a measure of the spread of a distribution. The variance is computed for the flattened array by default, otherwise over the specified axis.

In [1]:
import numpy as np
np.__version__

'2.1.1'

In [2]:
# Functions

def show_attr(arrnm: str) -> str:
    strout = f' {arrnm}: '

    for attr in ('shape', 'ndim', 'size', 'dtype'):     #, 'itemsize'):
            arrnm_attr = arrnm + '.' + attr
            strout += f'| {attr}: {eval(arrnm_attr)} '

    return strout

In [3]:
A = np.array([[1,0,0,3,1],
              [3,6,6,2,9],
              [4,5,3,8,0]])

print(show_attr('A'))
A

 A: | shape: (3, 5) | ndim: 2 | size: 15 | dtype: int64 


array([[1, 0, 0, 3, 1],
       [3, 6, 6, 2, 9],
       [4, 5, 3, 8, 0]])

In [4]:
# np.median() - The value in the middle position of a sorted dataset
# Even num of elements, the average of two middle ones

np.median(A)

np.float64(3.0)

In [5]:
# np.median(B) - # Even num of elements, the average of two middle ones
B = np.arange(20, 0, -1).reshape(4,5)
print(show_attr('B'))
display(B)
display(np.sort(B, axis=None))

np.median(B)

 B: | shape: (4, 5) | ndim: 2 | size: 20 | dtype: int64 


array([[20, 19, 18, 17, 16],
       [15, 14, 13, 12, 11],
       [10,  9,  8,  7,  6],
       [ 5,  4,  3,  2,  1]])

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20])

np.float64(10.5)

In [6]:
# np.median(C)
C = np.arange(21, 0, -1).reshape(3,7)
print(show_attr('C'))
display(C)
display(np.sort(C, axis=None))

np.median(C)

 C: | shape: (3, 7) | ndim: 2 | size: 21 | dtype: int64 


array([[21, 20, 19, 18, 17, 16, 15],
       [14, 13, 12, 11, 10,  9,  8],
       [ 7,  6,  5,  4,  3,  2,  1]])

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21])

np.float64(11.0)

In [7]:
# np.average() similar to np.mean() but permit weighted avg.
display(np.mean(A))
display(np.average(A))

np.float64(3.4)

np.float64(3.4)

In [8]:
# Create a randomize weights matrix
from numpy.random import Generator as gen 
from numpy.random import PCG64 as pcg 

array_RG = gen(pcg())
weights = array_RG.random(size=(3,5))

weights

array([[0.76073611, 0.22743686, 0.49834448, 0.38963295, 0.64092284],
       [0.97456786, 0.44243549, 0.40978337, 0.85839328, 0.837499  ],
       [0.22673113, 0.93719233, 0.13043709, 0.24767807, 0.79057226]])

In [9]:
# Calculate the weighted avg of A 
np.average(A, weights=weights)

np.float64(3.323729951755427)

In [10]:
# np.var() -> Variance 
display(np.var(A))
np.sqrt(np.var(A))


np.float64(7.84)

np.float64(2.8)

In [11]:
# np.std -> (Standard deviation) ^ 2 = Variance
display(np.std(A))
np.std(A) ** 2

np.float64(2.8)

np.float64(7.839999999999999)

Statistics - Averages and variances

median(a[, axis, out, overwrite_input, keepdims])
Compute the median along the specified axis.

average(a[, axis, weights, returned, keepdims])
Compute the weighted average along the specified axis.

mean(a[, axis, dtype, out, keepdims, where])
Compute the arithmetic mean along the specified axis.

std(a[, axis, dtype, out, ddof, keepdims, ...])
Compute the standard deviation along the specified axis.

var(a[, axis, dtype, out, ddof, keepdims, ...])
Compute the variance along the specified axis.

nanmedian(a[, axis, out, overwrite_input, ...])
Compute the median along the specified axis, while ignoring NaNs.

nanmean(a[, axis, dtype, out, keepdims, where])
Compute the arithmetic mean along the specified axis, ignoring NaNs.

nanstd(a[, axis, dtype, out, ddof, ...])
Compute the standard deviation along the specified axis, while ignoring NaNs.

nanvar(a[, axis, dtype, out, ddof, ...])
Compute the variance along the specified axis, while ignoring NaNs.

In [12]:
# Functions

def pos_median(arr):
    ''' Found de position of the median of an array flattened
       Sorted '''
    a_sorted = np.sort(arr, axis=None)
    els_mid = a_sorted.size / 2 

    # Even num of elements, els_mid has decimal != 0    
    if a_sorted.size % 2 == 1:
        ix_med = int(els_mid)
        ix_med_low = ix_med_hig = ix_med
    else:
        ix_med_low = int(els_mid) - 1
        ix_med_hig = int(els_mid)

    return ix_med_low, ix_med_hig

In [13]:
# np.median(A)

A_sorted = np.sort(A, axis=None)    # Sorting the array in a flatten view
print(show_attr('A_sorted'))
display(A_sorted)

print(pos_median(A))                # Position (index) of the median

pos_med_low = pos_median(A)[0]
pos_med_hig = pos_median(A)[1]
jm_median_low = A_sorted[pos_med_low]
jm_median_high = A_sorted[pos_med_low]

npmedian = np.median(A)

display(jm_median_low, jm_median_high, npmedian)

 A_sorted: | shape: (15,) | ndim: 1 | size: 15 | dtype: int64 


array([0, 0, 0, 1, 1, 2, 3, 3, 3, 4, 5, 6, 6, 8, 9])

(7, 7)


np.int64(3)

np.int64(3)

np.float64(3.0)

In [14]:
# np.median(B)
B = np.arange(20, 0, -1).reshape(4,5)
display(B)
pos_median(B)

B_sorted = np.sort(B, axis=None)
display(B_sorted)

print(pos_median(B))                # Position (index) of the median

pos_med_low = pos_median(B)[0]
pos_med_hig = pos_median(B)[1]
jm_median_low = B_sorted[pos_med_low]
jm_median_hig = B_sorted[pos_med_hig]

npmedian = np.median(B)

display(jm_median_low, jm_median_hig, npmedian)

array([[20, 19, 18, 17, 16],
       [15, 14, 13, 12, 11],
       [10,  9,  8,  7,  6],
       [ 5,  4,  3,  2,  1]])

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20])

(9, 10)


np.int64(10)

np.int64(11)

np.float64(10.5)

In [15]:
# np.median(B)
B = np.arange(21, 0, -1)
display(B)
pos_median(B)

B_sorted = np.sort(B, axis=None)
display(B_sorted)

print(pos_median(B))                # Position (index) of the median

pos_med_low = pos_median(B)[0]
pos_med_hig = pos_median(B)[1]
jm_median_low = B_sorted[pos_med_low]
jm_median_hig = B_sorted[pos_med_hig]

npmedian = np.median(B)

display(jm_median_low, jm_median_hig, npmedian)


array([21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,
        4,  3,  2,  1])

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21])

(10, 10)


np.int64(11)

np.int64(11)

np.float64(11.0)