In [1]:
import numpy as np
import pandas as pd
from scipy.stats import skew
from sklearn.preprocessing import PowerTransformer

'segmentationOriginal' is the built-in dataset in R. To download the segmentationOriginal dataset, run the following codes in R:

data("segmentationOriginal")  
write.csv(segmentationOriginal, file = "segmentationOriginal.csv", row.names = FALSE)

In [2]:
# Load the segmentationOriginal dataset
segData = pd.read_csv('segmentationOriginal.csv')

# Subset the data to include only the "Train" cases
segData = segData.loc[segData['Case'] == 'Train', :]

In [3]:
# Calculate the skewness of AngleCh1 variable
print('skewness for AngleCh1 variable:',skew(segData['AngleCh1']))

# Calculate the skewness of numeric variables in the dataset
numeric_cols = segData.select_dtypes(include='number').columns
segData[numeric_cols].apply(skew)

skewness for AngleCh1 variable: -0.024298630435426752


Cell                 7.922722
AngleCh1            -0.024299
AngleStatusCh1       0.941238
AreaCh1              3.530354
AreaStatusCh1        3.064855
                       ...   
VarIntenStatusCh4    1.388237
WidthCh1             1.882883
WidthStatusCh1       2.099100
XCentroid           -0.105747
YCentroid            0.496392
Length: 117, dtype: float64

In [4]:
# Apply the Box-Cox transformation to the AvgIntenCh1 variable
chiAreaTrans = PowerTransformer(method='box-cox').fit_transform(segData[['AvgIntenCh1']])

# Print the skewness of the AvgIntenCh1 variable
print('Skewness before transformation:',skew(segData['AvgIntenCh1']))
print('Skewness after transformation:',skew(chiAreaTrans, axis=0))

Skewness before transformation: 2.9635898861967185
Skewness after transformation: [0.10063278]
