# Statistics

In [1]:
import numpy as np 

In [2]:
# 1D array 
A1 = np.arange(20)
print(A1)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


In [4]:
A1.ndim

1

In [5]:
# 2D array 
A2 = np.array([[11, 12, 13], [21, 22, 23]])
print(A2)

[[11 12 13]
 [21 22 23]]


In [6]:
np.sum(A2, axis=0)

array([32, 34, 36])

In [7]:
np.sum(A2)

102

In [None]:
A2.ndim

## Sum 
- Sum of array elements over a given axis.
    - **Syntax:** `np.sum(array); array-wise sum`
    - **Syntax:** `np.sum(array, axis=0); row-wise sum`
    - **Syntax:** `np.sum(array, axis=1); column-wise sum`

![](../img/axis.jpg)
Axis 0 is thus the first dimension (the "rows"), and axis 1 is the second dimension (the "columns")

In [8]:
# sum of 1D array 
np.sum(A1)

190

In [9]:
# array-wise sum of 2D array 
np.sum(A2)

102

In [None]:
A2 

In [10]:
# sum of 2D array(axis=0, row-wise sum)
np.sum(A2, axis=0)

array([32, 34, 36])

In [11]:
# sum of 2D array(axis=1, column-wise sum)
np.sum(A2, axis=1)

array([36, 66])

## Mean 
- Compute the median along the specified axis.
- Returns the average of the array elements. The average is taken over the flattened array by default,  otherwise over the specified axis. `float64` intermediate and return values re used for integer inputs.

    - **Syntax:** `np.mean(array); array-wise mean`
    - **Syntax:** `np.mean(array, axis=0); row-wise mean`
    - **Syntax:** `np.mean(array, axis=1); column-wise mean`

In [15]:
A1 

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [13]:
A2 

array([[11, 12, 13],
       [21, 22, 23]])

In [16]:
# compute the average of array `A1`
np.mean(A1)

9.5

In [17]:
# mean of 2D array(axis=0, row-wise)
np.mean(A2, axis=0)

array([16., 17., 18.])

In [18]:
# mean of 2D array(axis=1, column-wise)
np.mean(A2, axis=1)

array([12., 22.])

## Median
- Compute the median along the specified axis.
- Returns the median of the array elements.
    
    - **Syntax:** `np.median(array); array-wise median`
    - **Syntax:** `np.median(array, axis=0); row-wise median`
    - **Syntax:** `np.median(array, axis=1); column-wise median`

In [19]:
# compute the meadian of `A1`
np.median(A1)

9.5

In [20]:
# median of 2D array(axis=0, row-wise)
np.median(A2, axis=0)

array([16., 17., 18.])

In [21]:
# median of 2D array(axis=1, column-wise)
np.median(A2, axis=1)

array([12., 22.])

## Minimum 
- Return the minimum of an array or minimum along an axis.
     
    - **Syntax:** `np.min(array); array-wise min`
    - **Syntax:** `np.min(array, axis=0); row-wise min`
    - **Syntax:** `np.min(array, axis=1); column-wise min`

In [23]:
# minimum value of `A1`
np.min(A1)

0

In [24]:
# minimum value of A2(axis=0, row-wise)
np.min(A2, axis=0)

array([11, 12, 13])

In [25]:
# minimum value of A2(axis=1, column-wise)
np.min(A2, axis=1)

array([11, 21])

## Maximum
- Return the maximum of an array or minimum along an axis.
     
    - **Syntax:** `np.max(array); array-wise max`
    - **Syntax:** `np.max(array, axis=0); row-wise max`
    - **Syntax:** `np.max(array, axis=1); column-wise max`

In [26]:
# maxiumum value of `A1`
np.max(A1)

19

In [27]:
# maxiumum value of A2(axis=0, row-wise)
np.max(A2, axis=0)

array([21, 22, 23])

In [28]:
# maxiumum value of A2(axis=1, column-wise)
np.max(A2, axis=1)

array([13, 23])

## Range 
- **Syntax:** `np.max(array) - np.min(array)`

In [29]:
A1.max() 

19

In [30]:
A1.min() 

0

In [31]:
r = np.max(A1) - np.min(A1)
print(r)

19


## Standard Deviation
- Compute the standard deviation along the specified axis.
- Returns the standard deviation, a measure of the spread of a distribution, of the array elements. The standard deviation is computed for the
flattened array by default, otherwise over the specified axis.
    - **Syntax:** `np.std(array); array-wise std`
    - **Syntax:** `np.std(array, axis=0); row-wise std`
    - **Syntax:** `np.std(array, axis=1); column-wise std`

In [32]:
# compute the standard deviation of `A1`
np.std(A1)

5.766281297335398

In [33]:
# standard deviation of 2D array(axis=0, row-wise)
np.std(A2, axis=0)

array([5., 5., 5.])

In [34]:
# standard deviation of 2D array(axis=1, column-wise)
np.std(A2, axis=1)

array([0.81649658, 0.81649658])

## Variance
- Compute the variance along the specified axis.
- Returns the variance of the array elements, a measure of the spread of a
  distribution.  The variance is computed for the flattened array by
  default, otherwise over the specified axis.
    - **Syntax:** `np.var(array); array-wise var`
    - **Syntax:** `np.var(array, axis=0); row-wise var`
    - **Syntax:** `np.var(array, axis=1); column-wise var`

In [35]:
# compute the variance of `A`
np.var(A1)

33.25

In [36]:
# variance of 2D array(axis=0, row-wise)
np.std(A2, axis=0)

array([5., 5., 5.])

In [37]:
# variance of 2D array(axis=1, column-wise)
np.std(A2, axis=1)

array([0.81649658, 0.81649658])

## Quantile
- Compute the q-th quantile of the data along the specified axis.
    - **Syntax:** `np.quantile(array); array-wise quantile`
    - **Syntax:** `np.quantile(array, axis=0); row-wise quantile`
    - **Syntax:** `np.quantile(array, axis=1); column-wise quantile`

In [38]:
# 25th percentile of `A1`
np.quantile(A1, 0.25)

4.75

In [39]:
np.quantile(A1, 0.50)

9.5

In [40]:
np.quantile(A1, 0.75)

14.25

In [41]:
# 50th percentile of `A2`(axis=0)
np.quantile(A2, 0.5, axis=0)

array([16., 17., 18.])

In [42]:
# 75th percentile of `A2`(axis=1)
np.quantile(A2, 0.75, axis=1)

array([12.5, 22.5])

## Correlation Coefficient

In [None]:
# documentation 
np.info(np.corrcoef)

In [43]:
# compute Correlation Coefficient
np.corrcoef(A2)

array([[1., 1.],
       [1., 1.]])