In [6]:
import pandas as pd
import numpy as np
import seaborn as sns

def add_z_scores(df):
    """
    Calcula os escores z para todas as colunas numéricas em um DataFrame.

    Parâmetros:
    df (DataFrame): DataFrame contendo os dados.

    Retorna:
    DataFrame: DataFrame com as colunas de escores z adicionadas.
    """
    # Seleciona apenas as colunas numéricas
    numeric_cols = df.select_dtypes(include=[np.number])

    # Calcula os z-scores para cada coluna numérica
    z_scores = (numeric_cols - numeric_cols.mean()) / numeric_cols.std()

    # Renomeia as colunas com o prefixo "z_"
    z_scores.columns = ['z_' + col for col in z_scores.columns]

    # Concatena os escores z com o DataFrame original
    df = pd.concat([df, z_scores], axis=1)

    return df



In [3]:

# Exemplo de uso:
# Supondo que df seja seu DataFrame
df = pd.DataFrame({'A': [1, 2, 3, 4, 5],
                   'B': [5, 6, 7, 8, 9],
                   'C': [10, 11, 12, 13, 14]})
df_com_z_scores = add_z_scores(df)

In [4]:
df_com_z_scores

Unnamed: 0,A,B,C,z_A,z_B,z_C
0,1,5,10,-1.264911,-1.264911,-1.264911
1,2,6,11,-0.632456,-0.632456,-0.632456
2,3,7,12,0.0,0.0,0.0
3,4,8,13,0.632456,0.632456,0.632456
4,5,9,14,1.264911,1.264911,1.264911


In [7]:
tips = sns.load_dataset("tips")

In [8]:
tips_z = add_z_scores(tips)

In [9]:
tips_z

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,z_total_bill,z_tip,z_size
0,16.99,1.01,Female,No,Sun,Dinner,2,-0.314066,-1.436993,-0.598961
1,10.34,1.66,Male,No,Sun,Dinner,3,-1.061054,-0.967217,0.452453
2,21.01,3.50,Male,No,Sun,Dinner,3,0.137497,0.362610,0.452453
3,23.68,3.31,Male,No,Sun,Dinner,2,0.437416,0.225291,-0.598961
4,24.59,3.61,Female,No,Sun,Dinner,4,0.539635,0.442111,1.503867
...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,1.038377,2.111622,0.452453
240,27.18,2.00,Female,Yes,Sat,Dinner,2,0.830568,-0.721488,-0.598961
241,22.67,2.00,Male,Yes,Sat,Dinner,2,0.323964,-0.721488,-0.598961
242,17.82,1.75,Male,No,Sat,Dinner,2,-0.220833,-0.902171,-0.598961


In [10]:
penguins = sns.load_dataset("penguins")

In [11]:
penguins_z = add_z_scores(penguins)

In [12]:
penguins_z

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,z_bill_length_mm,z_bill_depth_mm,z_flipper_length_mm,z_body_mass_g
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male,-0.883205,0.784300,-1.416272,-0.563317
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female,-0.809939,0.126003,-1.060696,-0.500969
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female,-0.663408,0.429833,-0.420660,-1.186793
3,Adelie,Torgersen,,,,,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,Female,-1.322799,1.088129,-0.562890,-0.937403
...,...,...,...,...,...,...,...,...,...,...,...
339,Gentoo,Biscoe,,,,,,,,,
340,Gentoo,Biscoe,46.8,14.3,215.0,4850.0,Female,0.527159,-1.443781,1.001641,0.808332
341,Gentoo,Biscoe,50.4,15.7,222.0,5750.0,Male,1.186550,-0.734846,1.499447,1.930590
342,Gentoo,Biscoe,45.2,14.8,212.0,5200.0,Female,0.234097,-1.190590,0.788296,1.244766


In [15]:
z_columns = penguins_z.filter(regex=r'^z')

In [16]:
z_columns

Unnamed: 0,z_bill_length_mm,z_bill_depth_mm,z_flipper_length_mm,z_body_mass_g
0,-0.883205,0.784300,-1.416272,-0.563317
1,-0.809939,0.126003,-1.060696,-0.500969
2,-0.663408,0.429833,-0.420660,-1.186793
3,,,,
4,-1.322799,1.088129,-0.562890,-0.937403
...,...,...,...,...
339,,,,
340,0.527159,-1.443781,1.001641,0.808332
341,1.186550,-0.734846,1.499447,1.930590
342,0.234097,-1.190590,0.788296,1.244766
