In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.datasets import load_iris

In [2]:
# Load sample data
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)

In [3]:
# Create scalers
standard_scaler = StandardScaler()
minmax_scaler = MinMaxScaler()
robust_scaler = RobustScaler()

In [4]:
# Apply different scaling methods
X_standardized = standard_scaler.fit_transform(X)
X_normalized = minmax_scaler.fit_transform(X)
X_robust = robust_scaler.fit_transform(X)

In [5]:
# Convert to DataFrames for better visualization
X_standardized = pd.DataFrame(X_standardized, columns=X.columns)
X_normalized = pd.DataFrame(X_normalized, columns=X.columns)
X_robust = pd.DataFrame(X_robust, columns=X.columns)

In [6]:
# Print original data statistics
print("Original Data Statistics:")
print(X.describe().round(2))
print("\n" + "="*50 + "\n")

Original Data Statistics:
       sepal length (cm)  sepal width (cm)  petal length (cm)  \
count             150.00            150.00             150.00   
mean                5.84              3.06               3.76   
std                 0.83              0.44               1.77   
min                 4.30              2.00               1.00   
25%                 5.10              2.80               1.60   
50%                 5.80              3.00               4.35   
75%                 6.40              3.30               5.10   
max                 7.90              4.40               6.90   

       petal width (cm)  
count            150.00  
mean               1.20  
std                0.76  
min                0.10  
25%                0.30  
50%                1.30  
75%                1.80  
max                2.50  




In [7]:
# Print standardized data statistics
print("Standardized Data Statistics (StandardScaler):")
print(X_standardized.describe().round(2))
print("\n" + "="*50 + "\n")

Standardized Data Statistics (StandardScaler):
       sepal length (cm)  sepal width (cm)  petal length (cm)  \
count             150.00            150.00             150.00   
mean               -0.00             -0.00              -0.00   
std                 1.00              1.00               1.00   
min                -1.87             -2.43              -1.57   
25%                -0.90             -0.59              -1.23   
50%                -0.05             -0.13               0.34   
75%                 0.67              0.56               0.76   
max                 2.49              3.09               1.79   

       petal width (cm)  
count            150.00  
mean              -0.00  
std                1.00  
min               -1.45  
25%               -1.18  
50%                0.13  
75%                0.79  
max                1.71  




In [8]:
# Print normalized data statistics
print("Normalized Data Statistics (MinMaxScaler):")
print(X_normalized.describe().round(2))
print("\n" + "="*50 + "\n")

Normalized Data Statistics (MinMaxScaler):
       sepal length (cm)  sepal width (cm)  petal length (cm)  \
count             150.00            150.00             150.00   
mean                0.43              0.44               0.47   
std                 0.23              0.18               0.30   
min                 0.00              0.00               0.00   
25%                 0.22              0.33               0.10   
50%                 0.42              0.42               0.57   
75%                 0.58              0.54               0.69   
max                 1.00              1.00               1.00   

       petal width (cm)  
count            150.00  
mean               0.46  
std                0.32  
min                0.00  
25%                0.08  
50%                0.50  
75%                0.71  
max                1.00  




In [9]:
# Print robust scaled data statistics
print("Robust Scaled Data Statistics (RobustScaler):")
print(X_robust.describe().round(2))

Robust Scaled Data Statistics (RobustScaler):
       sepal length (cm)  sepal width (cm)  petal length (cm)  \
count             150.00            150.00             150.00   
mean                0.03              0.11              -0.17   
std                 0.64              0.87               0.50   
min                -1.15             -2.00              -0.96   
25%                -0.54             -0.40              -0.79   
50%                 0.00              0.00               0.00   
75%                 0.46              0.60               0.21   
max                 1.62              2.80               0.73   

       petal width (cm)  
count            150.00  
mean              -0.07  
std                0.51  
min               -0.80  
25%               -0.67  
50%                0.00  
75%                0.33  
max                0.80  


In [10]:
# Example of scaling new data (like test set)
# Create some new sample data
new_data = np.array([
    [5.0, 3.2, 1.5, 0.4],
    [6.3, 2.8, 5.1, 1.9],
])
new_df = pd.DataFrame(new_data, columns=X.columns)

In [11]:
# Transform new data using the fitted scalers
new_standardized = standard_scaler.transform(new_df)
new_normalized = minmax_scaler.transform(new_df)
new_robust = robust_scaler.transform(new_df)

In [12]:
print("\n" + "="*50)
print("\nScaling New Data Example:")
print("\nOriginal New Data:")
print(new_df)
print("\nStandardized New Data:")
print(pd.DataFrame(new_standardized, columns=X.columns).round(3))
print("\nNormalized New Data:")
print(pd.DataFrame(new_normalized, columns=X.columns).round(3))
print("\nRobust Scaled New Data:")
print(pd.DataFrame(new_robust, columns=X.columns).round(3))



Scaling New Data Example:

Original New Data:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.0               3.2                1.5               0.4
1                6.3               2.8                5.1               1.9

Standardized New Data:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0             -1.022             0.328             -1.283            -1.052
1              0.553            -0.592              0.763             0.922

Normalized New Data:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0              0.194             0.500              0.085             0.125
1              0.556             0.333              0.695             0.750

Robust Scaled New Data:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0             -0.615               0.4             -0.814              -0.6
1              0.385              -0.4       