In [1]:
# ===============================================================================
# Título : Posição (ou separatrizes)
# Autores: Italo Seara, Lucas Luige e Gabriel Prado
# Objetivo: Apresentar os conceitos de posição ou separatrizes
# ===============================================================================

# Posição ou separatrizes
#  - Quartis
#  - Decis
#  - Percentis

# Carregando os pacotes necessários
import numpy as np  # Funções numéricas
import pandas as pd  # Funções para manipulação de dados

In [47]:
# Funções para mostrar os resultados de forma organizada
def quartiles(arr: np.array) -> pd.DataFrame:
    
    q = np.quantile(arr, [0.25, 0.5, 0.75], 
                    method='averaged_inverted_cdf')

    return pd.DataFrame(
        q, 
        columns=['Quartis'], 
        index=['25%', '50%', '75%']
    ).T

def deciles(arr: np.array) -> pd.DataFrame:
    d = np.quantile(arr, np.arange(0.1, 1, 0.1),
                    method='averaged_inverted_cdf')
    
    return pd.DataFrame(
        d,
        columns=['Decis'], 
        index=[f'{i}%' for i in range(10, 100, 10)]
    ).T

def percentiles(arr: np.array) -> pd.DataFrame:
    p = np.quantile(arr, np.arange(0.01, 1, 0.01),
                    method='averaged_inverted_cdf')
    
    return pd.DataFrame(
        p,
        columns=['Percentis'], 
        index=[f'{i}%' for i in range(1, 100)]
    ).T

In [42]:
# - Quartis

# A partir de um vetor
k = np.array([6, 7, 15, 36, 39, 40, 41, 42, 43, 47, 49])

# Todos os quartis
quartiles(k)

Unnamed: 0,25%,50%,75%
Quartis,15.0,40.0,43.0


In [4]:
# Observar:
q1 = np.quantile(k, 0.25)
q3 = np.quantile(k, 0.75)
aiq = q3 - q1 # Amplitude interquartil
q1, q3, aiq

(2.75, 9.0, 6.25)

In [9]:
# A partir de um DataFrame
dt = pd.read_csv('data/iris.csv')
dt.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [55]:
# Todos os quartis
quartiles(dt['petal.length'])

Unnamed: 0,25%,50%,75%
Quartis,1.6,4.35,5.1


In [44]:
# - Decis

# A partir de um vetor
k = np.array([1, 3, 5, 6, 6, 9, 10, 13, 15, 20, 22, 24, 25, 29, 31, 31, 39, 40, 55, 90, 91])

# Todos os decis
deciles(k)

Unnamed: 0,10%,20%,30%,40%,50%,60%,70%,80%,90%
Decis,5.0,6.0,10.0,15.0,22.0,25.0,31.0,39.0,55.0


In [45]:
# Decis específicos
np.quantile(k, 0.3)

10.0

In [46]:
# A partir de um DataFrame
dt = pd.read_csv('data/iris.csv')

# Todos os decis
deciles(dt['petal.length'])

Unnamed: 0,10%,20%,30%,40%,50%,60%,70%,80%,90%
Decis,1.4,1.5,1.7,3.9,4.35,4.65,5.0,5.35,5.8


In [10]:
# Decis específicos
np.quantile(dt['petal.length'], 0.3)

1.7

In [51]:
# - Percentis

# A partir de um vetor
k = np.array([31, 31, 37, 40, 48, 50, 51, 51, 60, 62, 64, 65, 65, 65, 66, 74, 74, 88, 91, 92])

# Todos os percentis
percentiles(k)

Unnamed: 0,1%,2%,3%,4%,5%,6%,7%,8%,9%,10%,...,90%,91%,92%,93%,94%,95%,96%,97%,98%,99%
Percentis,31.0,31.0,31.0,31.0,31.0,31.0,31.0,31.0,31.0,31.0,...,89.5,91.0,91.0,91.0,91.0,91.5,92.0,92.0,92.0,92.0


In [12]:
# Percentis específicos
np.quantile(k, 0.92)

89.44

In [52]:
# A partir de um DataFrame
dt = pd.read_csv('data/iris.csv')

# Todos os percentis
percentiles(dt['petal.length'])

Unnamed: 0,1%,2%,3%,4%,5%,6%,7%,8%,9%,10%,...,90%,91%,92%,93%,94%,95%,96%,97%,98%,99%
Percentis,1.1,1.2,1.3,1.3,1.3,1.3,1.3,1.4,1.4,1.4,...,5.8,5.8,5.9,6.0,6.05,6.1,6.2,6.4,6.65,6.7


In [53]:
# Percentis específicos
np.quantile(dt['petal.length'], 0.07)

1.3430000000000002