# Statistics

In [1]:
import numpy as np 

In [36]:
# 1D array 
A = np.arange(20)
print(A)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


In [39]:
A.ndim

1

In [37]:
# 2D array 
A2 = np.array([[11, 12, 13], [21, 22, 23]])
print(A2)

[[11 12 13]
 [21 22 23]]


In [40]:
A2.ndim

2

## Sum 
- Sum of array elements over a given axis.
    - **Syntax:** `np.sum(array); array-wise sum`
    - **Syntax:** `np.sum(array, axis=0); row-wise sum`
    - **Syntax:** `np.sum(array, axis=1); column-wise sum`

![](../img/axis.jpg)
Axis 0 is thus the first dimension (the "rows"), and axis 1 is the second dimension (the "columns")

In [41]:
# sum of 1D array 
np.sum(A1)

45

In [42]:
# array-wise sum of 2D array 
np.sum(A2)

102

In [43]:
# sum of 2D array(axis=0, row-wise sum)
np.sum(A2, axis=0)

array([32, 34, 36])

In [44]:
# sum of 2D array(axis=1, column-wise sum)
np.sum(A2, axis=1)

array([36, 66])

## Mean 
- Compute the median along the specified axis.
- Returns the average of the array elements. The average is taken over the flattened array by default,  otherwise over the specified axis. `float64` intermediate and return values re used for integer inputs.

    - **Syntax:** `np.mean(array); array-wise mean`
    - **Syntax:** `np.mean(array, axis=0); row-wise mean`
    - **Syntax:** `np.mean(array, axis=1); column-wise mean`

In [45]:
# compute the average of array `A`
np.mean(A)

9.5

In [46]:
# mean of 2D array(axis=0, row-wise)
np.mean(A2, axis=0)

array([16., 17., 18.])

In [47]:
# mean of 2D array(axis=1, column-wise)
np.mean(A2, axis=1)

array([12., 22.])

## Median
- Compute the median along the specified axis.
- Returns the median of the array elements.
    
    - **Syntax:** `np.median(array); array-wise median`
    - **Syntax:** `np.median(array, axis=0); row-wise median`
    - **Syntax:** `np.median(array, axis=1); column-wise median`

In [48]:
# compute the meadian of `A`
np.median(A)

9.5

In [21]:
# median of 2D array(axis=0, row-wise)
np.median(A2, axis=0)

array([16., 17., 18.])

In [22]:
# median of 2D array(axis=1, column-wise)
np.median(A2, axis=1)

array([12., 22.])

## Minimum 
- Return the minimum of an array or minimum along an axis.
     
    - **Syntax:** `np.min(array); array-wise min`
    - **Syntax:** `np.min(array, axis=0); row-wise min`
    - **Syntax:** `np.min(array, axis=1); column-wise min`

In [59]:
# minimum value of `A`
np.min(A)

0

In [61]:
# minimum value of A2(axis=0, row-wise)
np.min(A2, axis=0)

array([11, 12, 13])

In [62]:
# minimum value of A2(axis=1, column-wise)
np.min(A2, axis=1)

array([11, 21])

## Minimum 
- Return the maximum of an array or minimum along an axis.
     
    - **Syntax:** `np.max(array); array-wise max`
    - **Syntax:** `np.max(array, axis=0); row-wise max`
    - **Syntax:** `np.max(array, axis=1); column-wise max`

In [63]:
# maxiumum value of `A`
np.max(A)

19

In [64]:
# maxiumum value of A2(axis=0, row-wise)
np.max(A2, axis=0)

array([21, 22, 23])

In [65]:
# maxiumum value of A2(axis=1, column-wise)
np.max(A2, axis=1)

array([13, 23])

## Range 
- **Syntax:** `np.max(array) - np.min(array)`

In [66]:
r = np.max(A) - np.min(A)
print(r)

19


## Standard Deviation
- Compute the standard deviation along the specified axis.
- Returns the standard deviation, a measure of the spread of a distribution, of the array elements. The standard deviation is computed for the
flattened array by default, otherwise over the specified axis.
    - **Syntax:** `np.std(array); array-wise std`
    - **Syntax:** `np.std(array, axis=0); row-wise std`
    - **Syntax:** `np.std(array, axis=1); column-wise std`

In [49]:
# compute the standard deviation of `A`
np.std(A)

5.766281297335398

In [25]:
# standard deviation of 2D array(axis=0, row-wise)
np.std(A2, axis=0)

array([5., 5., 5.])

In [27]:
# standard deviation of 2D array(axis=1, column-wise)
np.std(A2, axis=1)

array([0.81649658, 0.81649658])

## Variance
- Compute the variance along the specified axis.
- Returns the variance of the array elements, a measure of the spread of a
  distribution.  The variance is computed for the flattened array by
  default, otherwise over the specified axis.
    - **Syntax:** `np.var(array); array-wise var`
    - **Syntax:** `np.var(array, axis=0); row-wise var`
    - **Syntax:** `np.var(array, axis=1); column-wise var`

In [50]:
# compute the variance of `A`
np.var(A)

33.25

In [32]:
# variance of 2D array(axis=0, row-wise)
np.std(A2, axis=0)

array([5., 5., 5.])

In [33]:
# variance of 2D array(axis=1, column-wise)
np.std(A2, axis=0)

array([5., 5., 5.])

## Quantile
- Compute the q-th quantile of the data along the specified axis.
    - **Syntax:** `np.quantile(array); array-wise quantile`
    - **Syntax:** `np.quantile(array, axis=0); row-wise quantile`
    - **Syntax:** `np.quantile(array, axis=1); column-wise quantile`

In [55]:
# 25th percentile of `A`
np.quantile(A, 0.25)

4.75

In [56]:
# 50th percentile of `A2`(axis=0)
np.quantile(A2, 0.5, axis=0)

array([16., 17., 18.])

In [57]:
# 75th percentile of `A2`(axis=1)
np.quantile(A2, 0.75, axis=1)

array([12.5, 22.5])

## Correlation Coefficient

In [68]:
# documentation 
np.info(np.corrcoef)

 corrcoef(*args, **kwargs)

Return Pearson product-moment correlation coefficients.

Please refer to the documentation for `cov` for more detail.  The
relationship between the correlation coefficient matrix, `R`, and the
covariance matrix, `C`, is

.. math:: R_{ij} = \frac{ C_{ij} } { \sqrt{ C_{ii} * C_{jj} } }

The values of `R` are between -1 and 1, inclusive.

Parameters
----------
x : array_like
    A 1-D or 2-D array containing multiple variables and observations.
    Each row of `x` represents a variable, and each column a single
    observation of all those variables. Also see `rowvar` below.
y : array_like, optional
    An additional set of variables and observations. `y` has the same
    shape as `x`.
rowvar : bool, optional
    If `rowvar` is True (default), then each row represents a
    variable, with observations in the columns. Otherwise, the relationship
    is transposed: each column represents a variable, while the rows
    contain observations.
bias : _NoValue, option

In [70]:
# compute Correlation Coefficient
np.corrcoef(A2)

array([[1., 1.],
       [1., 1.]])