In [5]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import PowerTransformer, MinMaxScaler, StandardScaler

# Sample dataset
data = {
    'Feature1': [1, 5, 10, 50, 100],      # Positive, skewed
    'Feature2': [0, 1, 4, 9, 16],         # Positive, can use sqrt/cbrt
    'Feature3': [-5, 0, 5, 10, 20]        # Can be negative, for Yeo-Johnson
}

df = pd.DataFrame(data)
print("Original Data:\n", df, "\n")

# 1. Log Transformation (handles zeros with log1p)
df['Feature1_log'] = np.log1p(df['Feature1'])
print("Log Transformation:\n", df[['Feature1', 'Feature1_log']], "\n")

# 2. Square Root Transformation
df['Feature2_sqrt'] = np.sqrt(df['Feature2'])
print("Square Root Transformation:\n", df[['Feature2', 'Feature2_sqrt']], "\n")

# 3. Cube Root Transformation
df['Feature2_cbrt'] = np.cbrt(df['Feature2'])
print("Cube Root Transformation:\n", df[['Feature2', 'Feature2_cbrt']], "\n")


Original Data:
    Feature1  Feature2  Feature3
0         1         0        -5
1         5         1         0
2        10         4         5
3        50         9        10
4       100        16        20 

Log Transformation:
    Feature1  Feature1_log
0         1      0.693147
1         5      1.791759
2        10      2.397895
3        50      3.931826
4       100      4.615121 

Square Root Transformation:
    Feature2  Feature2_sqrt
0         0            0.0
1         1            1.0
2         4            2.0
3         9            3.0
4        16            4.0 

Cube Root Transformation:
    Feature2  Feature2_cbrt
0         0       0.000000
1         1       1.000000
2         4       1.587401
3         9       2.080084
4        16       2.519842 



In [2]:
# 4. Box-Cox Transformation (only positive data)
# Box-Cox requires positive values
df_positive = df['Feature1'].values.reshape(-1, 1)
pt_boxcox = PowerTransformer(method='box-cox')
df['Feature1_boxcox'] = pt_boxcox.fit_transform(df_positive)
print("Box-Cox Transformation:\n", df[['Feature1', 'Feature1_boxcox']], "\n")

Box-Cox Transformation:
    Feature1  Feature1_boxcox
0         1        -1.454217
1         5        -0.575530
2        10        -0.166723
3        50         0.859509
4       100         1.336961 



In [3]:
# 5. Power Transformation (Yeo-Johnson, works with zero or negative)
df_all = df[['Feature1', 'Feature2', 'Feature3']]
pt_yeojohnson = PowerTransformer(method='yeo-johnson')
df_yeojohnson = pt_yeojohnson.fit_transform(df_all)
df_yeojohnson = pd.DataFrame(df_yeojohnson, columns=[f"{col}_yeojohnson" for col in df_all.columns])
df = pd.concat([df, df_yeojohnson], axis=1)
print("Yeo-Johnson Transformation:\n", df, "\n")

Yeo-Johnson Transformation:
    Feature1  Feature2  Feature3  Feature1_log  Feature2_sqrt  Feature2_cbrt  \
0         1         0        -5      0.693147            0.0       0.000000   
1         5         1         0      1.791759            1.0       1.000000   
2        10         4         5      2.397895            2.0       1.587401   
3        50         9        10      3.931826            3.0       2.080084   
4       100        16        20      4.615121            4.0       2.519842   

   Feature1_boxcox  Feature1_yeojohnson  Feature2_yeojohnson  \
0        -1.454217            -1.412019            -1.391864   
1        -0.575530            -0.621319            -0.796039   
2        -0.166723            -0.191167             0.063326   
3         0.859509             0.878365             0.772364   
4         1.336961             1.346141             1.352214   

   Feature3_yeojohnson  Feature1_scaled  Feature2_scaled  Feature3_scaled  \
0            -1.503256         0.0

In [4]:
# 6. Normalization / Scaling (Min-Max Scaling 0-1)
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df_all)
df_scaled = pd.DataFrame(df_scaled, columns=[f"{col}_scaled" for col in df_all.columns])
df = pd.concat([df, df_scaled], axis=1)
print("Min-Max Scaling:\n", df, "\n")

Min-Max Scaling:
    Feature1  Feature2  Feature3  Feature1_log  Feature2_sqrt  Feature2_cbrt  \
0         1         0        -5      0.693147            0.0       0.000000   
1         5         1         0      1.791759            1.0       1.000000   
2        10         4         5      2.397895            2.0       1.587401   
3        50         9        10      3.931826            3.0       2.080084   
4       100        16        20      4.615121            4.0       2.519842   

   Feature1_boxcox  Feature1_yeojohnson  Feature2_yeojohnson  \
0        -1.454217            -1.412019            -1.391864   
1        -0.575530            -0.621319            -0.796039   
2        -0.166723            -0.191167             0.063326   
3         0.859509             0.878365             0.772364   
4         1.336961             1.346141             1.352214   

   Feature3_yeojohnson  Feature1_scaled  Feature2_scaled  Feature3_scaled  \
0            -1.503256         0.000000      