In [16]:
# Feature scaling
# Min-Max scaling, Z-score scaling, Robust scaling, Normalization, Binarization

### MinMaxScaling

In [17]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, Normalizer, Binarizer

In [18]:
# sample data in random
data = {
    'x1': np.random.randint(0, 100, 5),
    'x2': np.random.randint(0, 100, 5),
    'x3': np.random.randint(0, 100, 5)
}

df = pd.DataFrame(data)
print(df)

   x1  x2  x3
0  78  68  99
1  97   0  12
2  62  83  11
3  32  10  89
4  84  23  58


In [19]:
# Min-Max scaling 
scaler = MinMaxScaler() # create a scaler object
df_scaled = scaler.fit_transform(df[[data for data in df.columns]])
print(df_scaled)

[[0.70769231 0.81927711 1.        ]
 [1.         0.         0.01136364]
 [0.46153846 1.         0.        ]
 [0.         0.12048193 0.88636364]
 [0.8        0.27710843 0.53409091]]


In [20]:
# Z-score scaling or Standardization
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df[[data for data in df.columns]])
print(df_scaled)

[[ 0.33125622  0.95196308  1.21861662]
 [ 1.18177894 -1.12282825 -1.12695076]
 [-0.38497344  1.40963764 -1.15391131]
 [-1.72790406 -0.81771188  0.94901117]
 [ 0.59984234 -0.42106059  0.11323429]]


In [21]:
# Robust scaling
scaler = RobustScaler()
df_scaled = scaler.fit_transform(df[[data for data in df.columns]])
print(df_scaled)

[[ 0.          0.77586207  0.53246753]
 [ 0.86363636 -0.39655172 -0.5974026 ]
 [-0.72727273  1.03448276 -0.61038961]
 [-2.09090909 -0.22413793  0.4025974 ]
 [ 0.27272727  0.          0.        ]]


In [22]:
# Normalization or L1 norm scaling or Log scaling
scaler = Normalizer()
df_scaled = scaler.fit_transform(df[[data for data in df.columns]])
print(df_scaled)

[[0.54465609 0.47482838 0.69129426]
 [0.99243448 0.         0.1227754 ]
 [0.59510936 0.79667866 0.10558392]
 [0.33646949 0.10514672 0.93580578]
 [0.80277171 0.21980654 0.55429475]]


In [23]:
data = {'rand_data': [10000, 20000, 30000, 40000, 50000]}
df = pd.DataFrame(data)

# Log Transformation
df['Log_data'] = np.log(df['rand_data'])
df['Log2_data'] = np.log2(df['rand_data'])
df['Log10_data'] = np.log10(df['rand_data'])

print(df)

   rand_data   Log_data  Log2_data  Log10_data
0      10000   9.210340  13.287712    4.000000
1      20000   9.903488  14.287712    4.301030
2      30000  10.308953  14.872675    4.477121
3      40000  10.596635  15.287712    4.602060
4      50000  10.819778  15.609640    4.698970
