# 1. Centrality measures
## 1.1. Mean
$$\mu = \dfrac{1}{n}\sum_{i} {x_i}$$

In [1]:
age = c(15, 16, 15, 15, 16, 25, 19, 18, 15, 24, 17, 18, 18, 15)
mean(age)

## 1.2. Median

In [2]:
age = c(15, 16, 15, 15, 16, 25, 19, 18, 15, 24, 17, 18, 18, 15)
median(age)

## 1.3. Mode

In [3]:
getMode = function(values) {
    uniqv = unique(values)
    return (uniqv[which.max(tabulate(match(values, uniqv)))])
}

In [6]:
age = c(15, 16, 15, 15, 16, 25, 19, 18, 15, 24, 17, 18, 18, 15)

getMode(age)

# 2. Measures of dispersion
## 2.1. Range

In [8]:
a = c(7, 9.5, 10, 11, 14, 15, 19)

ptp = range(a)

In [9]:
ptp

In [10]:
ptp[2] - ptp[1]

# 3. Percentile, IQR
## 3.1. Percentile

In [14]:
age = c(15, 16, 15, 15, 16, 25, 19, 18, 15, 24, 17, 18, 18, 15)

print(quantile(age, c(0.35, 0.5, 0.98)))

  35%   50%   98% 
15.55 16.50 24.74 


## 3.2. Quartile - IQR

In [13]:
age = c(15, 16, 15, 15, 16, 25, 19, 18, 15, 24, 17, 18, 18, 15)

print(quantile(age))

  0%  25%  50%  75% 100% 
15.0 15.0 16.5 18.0 25.0 


# 4. Variance, Standard Deviation, Skewness, Kurtosis
## 4.1. Variance

$$Var(X) = \dfrac{1}{n}\sum_{i = 1}^n (x_i - \bar{x})^2$$

In [15]:
age = c(15, 16, 15, 15, 16, 25, 19, 18, 15, 24, 17, 18, 18, 15)

var(age)

## 4.2. Standard Deviation
$$\sigma = \sqrt{\dfrac{\sum (x - \bar{x})^2}{n}}$$

In [16]:
age = c(15, 16, 15, 15, 16, 25, 19, 18, 15, 24, 17, 18, 18, 15)

sd(age)

## 4.3. Skewness
$$skew = \dfrac{1}{n} \dfrac{\sum_{i = 1}^n (X_i - \bar{X})^3}{\sigma^3}$$

In [25]:
install.packages('moments')
library(moments)

Installing package into ‘/home/manhcuong/R/x86_64-pc-linux-gnu-library/4.1’
(as ‘lib’ is unspecified)



In [29]:
age = c(15, 16, 15, 15, 16, 25, 19, 18, 15, 24, 17, 18, 18, 15)

skewness(age)

## 4.4. Kurtosis
$$kurtosis = \dfrac{1}{n}\dfrac{\sum_{i = 1}^{n} (X_i - \bar{X})^4}{\sigma^4}$$

In [30]:
age = c(15, 16, 15, 15, 16, 25, 19, 18, 15, 24, 17, 18, 18, 15)

kurtosis(age)

# 5. Covariance, Correlation
## 5.1. Covariance
$$cov(X, Y) = \dfrac{1}{n}\sum_{i = 1}^n (X - \bar{X}) (Y - \bar{Y})$$

In [31]:
years = c(7, 28, 23, 18, 19, 15, 24, 13, 2, 8, 20, 21, 18, 7, 2)
salary = c(26075, 79370, 65726, 41983, 62308, 41154, 53610, 33697,
           22444, 32562, 43076, 56000, 58667, 22210, 20521)

cov(years, salary)

## 5.2. Correlation
$$corr = \dfrac{1}{n - 1} \sum \left(\dfrac{X - \bar{X}}{\sigma_X} \right) \left(\dfrac{Y - \bar{Y}}{\sigma_Y} \right)$$

In [32]:
years = c(7, 28, 23, 18, 19, 15, 24, 13, 2, 8, 20, 21, 18, 7, 2)
salary = c(26075, 79370, 65726, 41983, 62308, 41154, 53610, 33697,
           22444, 32562, 43076, 56000, 58667, 22210, 20521)

cor(years, salary)