In [4]:
# carregar biblioteca 
import pandas as pd
import seaborn as sns
import numpy as np


## Leitura do conjunto de dados

### pinguins

In [5]:
df_penguins = sns.load_dataset('penguins')
df_penguins

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,Female
...,...,...,...,...,...,...,...
339,Gentoo,Biscoe,,,,,
340,Gentoo,Biscoe,46.8,14.3,215.0,4850.0,Female
341,Gentoo,Biscoe,50.4,15.7,222.0,5750.0,Male
342,Gentoo,Biscoe,45.2,14.8,212.0,5200.0,Female


- Dicionário de Dados

In [50]:
dicionario_penguins = pd.DataFrame([
    {
        "Nome": "species",
        "Descricao": "Especie de pinguins",
        "Tipo": "Qualitativo",
        "Subtipo": "Nominal"
    },
    {
        "Nome": "island",
        "Descricao": "Ilha onde é localizado essa espécie de pinguim",
        "Tipo": "Qualitativo",
        "Subtipo": "Nominal"
    },
    {
        "Nome": "bill_length_mm",
        "Descricao":"Tamanho do bico do pinguim",
        "Tipo": "Quantitativo",
        "Subtipo": "Contínuo",
    },
     {
        "Nome": "bill_depth_mm",
        "Descricao":"Profundidade do bico do pinguim",
        "Tipo": "Quantitativo",
        "Subtipo": "Contínuo",
    },
     {
         "Nome": "flipper_length_mm",
        "Descricao":"Tamanho da nadadeira do pinguim",
        "Tipo": "Quantitativo",
        "Subtipo": "Contínuo",
    },
    {
        "Nome": "body_mass_g",
        "Descricao":"Massa corporal do pinguim",
        "Tipo": "Quantitativo",
        "Subtipo": "Contínuo",
    },
    {
        "Nome": "sex",
        "Descricao":"Sexo do pinguim",
        "Tipo": "Qualitativo",
        "Subtipo": "Nominal",
    },
])

#Imprimindo o DataFrame
dicionario_penguins


Unnamed: 0,Nome,Descricao,Tipo,Subtipo
0,species,Especie de pinguins,Qualitativo,Nominal
1,island,Ilha onde é localizado essa espécie de pinguim,Qualitativo,Nominal
2,bill_length_mm,Tamanho do bico do pinguim,Quantitativo,Contínuo
3,bill_depth_mm,Profundidade do bico do pinguim,Quantitativo,Contínuo
4,flipper_length_mm,Tamanho da nadadeira do pinguim,Quantitativo,Contínuo
5,body_mass_g,Massa corporal do pinguim,Quantitativo,Contínuo
6,sex,Sexo do pinguim,Qualitativo,Nominal


- Estatística de tendência central e dispersão

In [42]:
#Funções que calculam a moda, o intervalo e o intervalo interquartil

#Função para calcular a moda
def mode(column):
    mode = column.mode()
    if not mode.empty: 
        return mode.iloc[0]
    else:
        return None
    
#Função para calcular o intervalo
def intervalo(column):
    inter = column.max() - column.min()
    return inter

#Função para calcular o intervalo interquartil
def intervalo_interquartil(column):
    q1 = column.quantile(0.25)
    q3 = column.quantile(0.75)
    iqr = q3 - q1
    return iqr

In [45]:
# Lista para renomear o  titulo das colunas para melhor compreensão
novos_nomes = {
    'mean': 'Média',
    'median': 'Mediana',
    'std': 'Desvio Padrão',
    'var': 'Variância',
    'mode': 'Moda',
    'intervalo': 'Intervalo',
    'intervalo_interquartil': 'Intervalo Interquartil'
}

In [51]:
# Filtrar apenas as colunas numéricas
numeric_columns  = df_penguins.select_dtypes('float64')

# Adicionado o calculo das medidas de tendência central e dispersão
tendencia_central_e_dispersao_values =  numeric_columns .agg(['mean', 'median','std', 'var', mode, intervalo, intervalo_interquartil])

#Renomeando as colunas
tendencia_central_e_dispersao_values = tendencia_central_e_dispersao_values.rename(columns=novos_nomes)

#Imprimindo o DataFrame
tendencia_central_e_dispersao_values



Unnamed: 0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g
mean,43.92193,17.15117,200.915205,4201.754386
median,44.45,17.3,197.0,4050.0
std,5.459584,1.974793,14.061714,801.954536
var,29.807054,3.899808,197.731792,643131.077327
mode,41.1,17.0,190.0,3800.0
intervalo,27.5,8.4,59.0,3600.0
intervalo_interquartil,9.275,3.1,23.0,1200.0


- Calculo estastítica de forma separada para cada variável categórica


In [52]:
# Criando uma lista com as colunas numericas
colunas_numericas = df_penguins.select_dtypes('float64').columns

      - Por Espécie

In [53]:
# Adicionado o calculo das medidas de tendência central e dispersão
values_by_species = (
    df_penguins
    .groupby('species')[colunas_numericas]
    .agg(['mean', 'median','std', 'var', mode, intervalo, intervalo_interquartil])
)

#Renomeando as colunas
values_by_species = values_by_species.rename(columns=novos_nomes)

#Imprimindo o DataFrame
values_by_species

Unnamed: 0_level_0,bill_length_mm,bill_length_mm,bill_length_mm,bill_length_mm,bill_length_mm,bill_length_mm,bill_length_mm,bill_depth_mm,bill_depth_mm,bill_depth_mm,...,flipper_length_mm,flipper_length_mm,flipper_length_mm,body_mass_g,body_mass_g,body_mass_g,body_mass_g,body_mass_g,body_mass_g,body_mass_g
Unnamed: 0_level_1,Média,Mediana,Desvio Padrão,Variância,Moda,Intervalo,Intervalo Interquartil,Média,Mediana,Desvio Padrão,...,Moda,Intervalo,Intervalo Interquartil,Média,Mediana,Desvio Padrão,Variância,Moda,Intervalo,Intervalo Interquartil
species,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Adelie,38.791391,38.8,2.663405,7.093725,41.1,13.9,4.0,18.346358,18.4,1.21665,...,190.0,38.0,9.0,3700.662252,3700.0,458.566126,210282.891832,3550.0,1925.0,650.0
Chinstrap,48.833824,49.55,3.339256,11.15063,51.3,17.1,4.725,18.420588,18.45,1.135395,...,187.0,34.0,10.0,3733.088235,3700.0,384.335081,147713.454785,3400.0,2100.0,462.5
Gentoo,47.504878,47.3,3.081857,9.497845,45.2,18.7,4.25,14.982114,15.0,0.98122,...,215.0,28.0,9.0,5076.01626,5000.0,504.116237,254133.180061,5000.0,2350.0,800.0


    - Por Ilha

In [55]:
# Adicionado o calculo das medidas de tendência central e dispersão
values_by_island = (
    df_penguins
    .groupby('island')[colunas_numericas]
    .agg(['mean', 'median','std', 'var', mode, intervalo, intervalo_interquartil])
)

#Renomeando as colunas
values_by_island = values_by_island.rename(columns=novos_nomes)

#Imprimindo o DataFrame
values_by_island

Unnamed: 0_level_0,bill_length_mm,bill_length_mm,bill_length_mm,bill_length_mm,bill_length_mm,bill_length_mm,bill_length_mm,bill_depth_mm,bill_depth_mm,bill_depth_mm,...,flipper_length_mm,flipper_length_mm,flipper_length_mm,body_mass_g,body_mass_g,body_mass_g,body_mass_g,body_mass_g,body_mass_g,body_mass_g
Unnamed: 0_level_1,Média,Mediana,Desvio Padrão,Variância,Moda,Intervalo,Intervalo Interquartil,Média,Mediana,Desvio Padrão,...,Moda,Intervalo,Intervalo Interquartil,Média,Mediana,Desvio Padrão,Variância,Moda,Intervalo,Intervalo Interquartil
island,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Biscoe,45.257485,45.8,4.772731,22.778965,45.2,25.1,6.7,15.87485,15.5,1.820721,...,215.0,59.0,20.5,4716.017964,4775.0,782.855743,612863.114133,5000.0,3450.0,1125.0
Dream,44.167742,44.65,5.953527,35.444479,36.0,25.9,10.7,18.344355,18.4,1.133116,...,190.0,34.0,10.25,3712.903226,3687.5,416.644112,173592.315762,3400.0,2100.0,556.25
Torgersen,38.95098,38.9,3.025318,9.152549,34.6,12.5,4.45,18.429412,18.4,1.339447,...,190.0,34.0,8.0,3706.372549,3700.0,445.10794,198121.078431,3700.0,1800.0,662.5


    - Por Sexo

In [56]:
# Adicionado o calculo das medidas de tendência central e dispersão
values_by_sex = (
    df_penguins
    .groupby('sex')[colunas_numericas]
    .agg(['mean', 'median','std', 'var', mode, intervalo, intervalo_interquartil])
)

#Renomeando as colunas
values_by_sex = values_by_sex.rename(columns=novos_nomes)

#Imprimindo o DataFrame
values_by_sex

Unnamed: 0_level_0,bill_length_mm,bill_length_mm,bill_length_mm,bill_length_mm,bill_length_mm,bill_length_mm,bill_length_mm,bill_depth_mm,bill_depth_mm,bill_depth_mm,...,flipper_length_mm,flipper_length_mm,flipper_length_mm,body_mass_g,body_mass_g,body_mass_g,body_mass_g,body_mass_g,body_mass_g,body_mass_g
Unnamed: 0_level_1,Média,Mediana,Desvio Padrão,Variância,Moda,Intervalo,Intervalo Interquartil,Média,Mediana,Desvio Padrão,...,Moda,Intervalo,Intervalo Interquartil,Média,Mediana,Desvio Padrão,Variância,Moda,Intervalo,Intervalo Interquartil
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Female,42.09697,42.8,4.903476,24.044076,46.5,25.9,8.6,16.425455,17.0,1.795681,...,187.0,50.0,23.0,3862.272727,3650.0,666.17205,443785.199557,3700.0,2500.0,1200.0
Male,45.854762,46.8,5.366896,28.80357,41.1,25.0,9.35,17.891071,18.45,1.863351,...,190.0,53.0,26.0,4545.684524,4300.0,787.628884,620359.25916,3900.0,3050.0,1412.5
