In [1]:
# Import StandardScaler for standardization
# Import pandas for handling tabular data
from sklearn.preprocessing import StandardScaler
import pandas as pd

### 📊 Sample Data
Creating a small dataset with numeric features on different scales — Age and Income.
This will help demonstrate how StandardScaler brings them to a standard scale.

In [2]:
# Create a sample dataset
data = {
    'Age': [22, 25, 47, 52, 46],        # Small scale feature
    'Income': [18000, 24000, 52000, 58000, 60000]  # Large scale feature
}

# Convert the dictionary into a pandas DataFrame
df = pd.DataFrame(data)

# Display the original data
df

Unnamed: 0,Age,Income
0,22,18000
1,25,24000
2,47,52000
3,52,58000
4,46,60000


### ⚙️ Apply StandardScaler
We now use StandardScaler to scale the features so they have:
- Mean = 0
- Standard deviation = 1

This ensures all features contribute equally during training.

In [3]:
# Initialize the scaler
scaler = StandardScaler()

# Fit the scaler on the data and transform it
# Returns a NumPy array with standardized values
standardized = scaler.fit_transform(df)

In [4]:
# Convert the standardized array back into a DataFrame
# Use original column names for readability
standardized_df = pd.DataFrame(standardized, columns=df.columns)

# Display the standardized data
standardized_df

Unnamed: 0,Age,Income
0,-1.325688,-1.372955
1,-1.083184,-1.035343
2,0.695178,0.540179
3,1.099351,0.877791
4,0.614343,0.990328


In [5]:
# Compare the original and standardized values
print("Original Data:")
print(df)

print("\nStandardized Data:")
print(standardized_df)

Original Data:
   Age  Income
0   22   18000
1   25   24000
2   47   52000
3   52   58000
4   46   60000

Standardized Data:
        Age    Income
0 -1.325688 -1.372955
1 -1.083184 -1.035343
2  0.695178  0.540179
3  1.099351  0.877791
4  0.614343  0.990328
