# Medidas de tendência central
## Média
$$\mu = \sum_{i=1}^{N}\frac{y_i}{N}$$

In [6]:
grades <- c(6.5, 10, 8, 9.4, 8, 6.4, 7)
(mean(grades))

### Média por frequência
$$\mu = \frac{\sum_{i=1}^{n}y_iF_i}{\sum_{i=1}^{n}F_i}$$

In [11]:
# In case of having the raw data, use: table(data)

data <- c(15, 16, 17, 18, 22, 26)
frequency <- c(2, 9, 7, 6, 5, 6)
m <- sum(data * frequency) / sum(frequency)
(round(m, 2))

### Média por frequência com intervalos
- Mesma fórmula
- Usa o valor central do intervalo como o $y_i$

In [13]:
# Rebuilt the frequency distribuiton table
(table <- make.fdt(f=c(70, 20, 15, 15, 12, 18, 50),
                 start=0,
                 end=70))
# Uses mean.fdt because it is a fdt object
(mean(table))

Class limits,f,rf,rf(%),cf,cf(%)
"[0,10)",70,0.35,35.0,70,35.0
"[10,20)",20,0.1,10.0,90,45.0
"[20,30)",15,0.075,7.5,105,52.5
"[30,40)",15,0.075,7.5,120,60.0
"[40,50)",12,0.06,6.0,132,66.0
"[50,60)",18,0.09,9.0,150,75.0
"[60,70)",50,0.25,25.0,200,100.0


### Média geral
$$\mu = \frac{\sum_{i=1}^{N_y}n_{yi}\bar{y}_i}{\sum_{i=1}^{N_y}n_{yi}}$$

In [33]:
data <- list(
    4:8,
    1:3,
    9:13
)


g.mean <- function(arr) {
    yn <- 0
    n <- 0
    for (i in 1:length(arr)) {
        yn <- yn + length(arr[[i]]) * mean(arr[[i]])
        n <- n + length(arr[[i]])
    }
    
    yn / n
}
   
(g.mean(data))

### Média geométrica
1. Pegue dois números
2. Faça um retângulo com lados medindo esses dois números
3. Faça um quadrado de mesma área
4. Pegue o tamanho do lado do quadrado
$$\mu = \sqrt[N]{\prod_{i=1}^{N}y_i}$$

In [36]:
data <- c(1.10, 1.20)
gm <- geometric.mean(data)
(round(gm, 2))

### Média harmônica
- Para grandezas inversamente proporcionais
$$\mu = \frac{N}{\prod_{i=1}^{N}\frac{y_i}{F_i}}$$

In [37]:
data <- c(30, 20)
hm <- harmonic.mean(data)
(round(hm, 2))

## Moda
- É o que mais aparece

In [39]:
data <- c(
    rep(15, 2),
    rep(16, 9),
    rep(17, 7),
    rep(18, 6),
    rep(22, 5),
    rep(26, 6)
)

(mfv(data))

### Moda com intervalos
- Fórmula de Czuber
    - $D_1 = F_i - F_{i - 1}$
    - $D_2 = F_i - F_{i + 1}$
    - $l \leq F_i < L$
$$mo = l + \frac{D_1}{D_1 + D_2}(L - l)$$

In [43]:
(table <- make.fdt(
    f=c(5, 7, 4, 3, 1), 
    start=4, 
    end=24
))
mfv(table)

Class limits,f,rf,rf(%),cf,cf(%)
"[4,8)",5,0.25,25,5,25
"[8,12)",7,0.35,35,12,60
"[12,16)",4,0.2,20,16,80
"[16,20)",3,0.15,15,19,95
"[20,24)",1,0.05,5,20,100


## Mediana
- Número que divide os dados em partes iguais
- Se $N$ for ímpar:
$$i_{\tilde{y}} = \frac{N + 1}{2}$$
- Se $N$ for par:
    - Tira a média dos dois centrais
$$i = \frac{N}{2}$$
$$\tilde{y} = \frac{y_i + y_{i + 1}}{2}$$

In [47]:
# Odd
data <- c(
    rep(15, 2),
    rep(16, 9),
    rep(17, 7),
    rep(18, 6),
    rep(22, 5),
    rep(26, 6)
)

(length(data))
(median(data))

# Even
data <- c(
    rep(15, 2),
    rep(16, 9),
    rep(17, 7),
    rep(18, 6),
    rep(22, 5),
    rep(26, 6),
    rep(27, 1)
)

(length(data))
(median(data))

### Mediana com variáveis contínuas
- A classe que contém a mediana é identificada pela frequência acumulada
- Fórmula de interpolação linear
    - $l \leq F_{\tilde{y}} < L$
$$\tilde{y} = l + \frac{\frac{N}{2} - \sum_{i=1}^{N_{F_{\tilde{y}}}}F_i}{F_{\tilde{y}}}(L - l)$$

In [49]:
(table <- make.fdt(f=c(70, 20, 15, 15, 12, 18, 50),
                 start=0,
                 end=70))

md <- median(table)
(round(md, 2))

Class limits,f,rf,rf(%),cf,cf(%)
"[0,10)",70,0.35,35.0,70,35.0
"[10,20)",20,0.1,10.0,90,45.0
"[20,30)",15,0.075,7.5,105,52.5
"[30,40)",15,0.075,7.5,120,60.0
"[40,50)",12,0.06,6.0,132,66.0
"[50,60)",18,0.09,9.0,150,75.0
"[60,70)",50,0.25,25.0,200,100.0


## Quantis
- Valores divisores de dados em partes iguais
- Sendo $n$ o número de divisões:
    - Quartis: $n = 4$
    - Decis: $n = 10$
    - Percentis: $n = 100$
- O número de quantis é $n - 1$

### Para variáveis discretas
- Sempre arredonda $I_{Q_i}$ (índice do $i$-ésimo quantil) para cima
- Se $N$ for ímpar:
$$I_{Q_i} = \frac{i\cdot N}{n}$$
$$Q_i = y_{I_{Q_i}}$$
- Se $N$ for par:
$$Q_i = \frac{y_{I_{Q_i}} + y_{I_{Q_i} + 1}}{2}$$

In [51]:
# Quartile (discrete variables)
data <- c(1, 1, 2, 3, 5, 5, 6, 7, 9, 9, 10, 13)

(quantile(data)[2:4])

In [59]:
# Quartile (continuous variables)
(table <- make.fdt(f=c(4, 9, 11, 8, 5, 3),
                start=50,
                end=74))

quartiles <- c(0, 0, 0)
for (idx in 1:3) {
    quartiles[idx] = quantile(table, i=idx)
}

names(quartiles) <- paste0(c(25, 50, 75), '%')

(quartiles)

Class limits,f,rf,rf(%),cf,cf(%)
"[50,54)",4,0.1,10.0,4,10.0
"[54,58)",9,0.225,22.5,13,32.5
"[58,62)",11,0.275,27.5,24,60.0
"[62,66)",8,0.2,20.0,32,80.0
"[66,70)",5,0.125,12.5,37,92.5
"[70,74)",3,0.075,7.5,40,100.0


In [60]:
# Decile (discrete variables)
data <- c(1, 3, 5, 6, 6, 9, 10, 13, 15, 20, 22, 
          24, 25, 29, 31, 31, 39, 40, 55, 90, 91)

(quantile(data, probs=seq(0, 1, 0.1))[2:10])

In [64]:
# Deciles (continuous variables)
(table <- make.fdt(f=c(4, 9, 11, 8, 5, 3),
                start=50,
                end=74))

deciles <- 1:9
for (idx in 1:9) {
    decil[idx] <- quantile(table, i=idx, probs=seq(0, 1, 0.1))
}

names(deciles) <- paste0(1:9 * 10, '%')

(deciles)

Class limits,f,rf,rf(%),cf,cf(%)
"[50,54)",4,0.1,10.0,4,10.0
"[54,58)",9,0.225,22.5,13,32.5
"[58,62)",11,0.275,27.5,24,60.0
"[62,66)",8,0.2,20.0,32,80.0
"[66,70)",5,0.125,12.5,37,92.5
"[70,74)",3,0.075,7.5,40,100.0


In [65]:
# Percentiles (discrete variables)
data <- c(31, 31, 37, 40, 48, 50, 51, 51, 60, 62, 
       64, 65, 65, 65, 66, 74, 74, 88, 91, 92)

quantile(data, p=seq(0, 1, 0.01))[2:100]

In [68]:
# Percentiles (continuous variables)
(table <- make.fdt(f=c(5, 7, 4, 2, 2),
                start=4,
                end=24))

deciles <- 1:99
for (idx in 1:99) {
    deciles[idx] <- quantile(table, i=idx, probs=seq(0, 1, 0.01))
}

names(deciles) <- paste0(1:99, '%')

(deciles)

Class limits,f,rf,rf(%),cf,cf(%)
"[4,8)",5,0.25,25,5,25
"[8,12)",7,0.35,35,12,60
"[12,16)",4,0.2,20,16,80
"[16,20)",2,0.1,10,18,90
"[20,24)",2,0.1,10,20,100
