feature_engineering_scaling

In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler

In [3]:
# Sample data
data = {
    'Color': ['Red', 'Green', 'Blue', 'Green', 'Red'],
    'Size': ['S', 'M', 'L', 'M', 'S'],
    'Price': [10, 20, 15, 22, 18]
}
df = pd.DataFrame(data)

In [4]:
# 1. Label Encoding
label_encoder = LabelEncoder()
df['Size_Encoded'] = label_encoder.fit_transform(df['Size'])

In [5]:
print("Label Encoded 'Size':")
print(df[['Size', 'Size_Encoded']])
print()

Label Encoded 'Size':
  Size  Size_Encoded
0    S             2
1    M             1
2    L             0
3    M             1
4    S             2



In [6]:
# 2. One-Hot Encoding
onehot_encoder = pd.get_dummies(df['Color'], prefix='Color')
df = pd.concat([df, onehot_encoder], axis=1)

In [7]:
print("One-Hot Encoded 'Color':")
print(df)
print()

One-Hot Encoded 'Color':
   Color Size  Price  Size_Encoded  Color_Blue  Color_Green  Color_Red
0    Red    S     10             2       False        False       True
1  Green    M     20             1       False         True      False
2   Blue    L     15             0        True        False      False
3  Green    M     22             1       False         True      False
4    Red    S     18             2       False        False       True



In [8]:
# 3. Standardization
scaler = StandardScaler()
df['Price_Standardized'] = scaler.fit_transform(df[['Price']])

In [9]:
print("Standardized 'Price':")
print(df[['Price', 'Price_Standardized']])
print()

Standardized 'Price':
   Price  Price_Standardized
0     10           -1.668560
1     20            0.715097
2     15           -0.476731
3     22            1.191828
4     18            0.238366



In [10]:
# 4. Normalization
minmax_scaler = MinMaxScaler()
df['Price_Normalized'] = minmax_scaler.fit_transform(df[['Price']])

In [11]:
print("Normalized 'Price':")
print(df[['Price', 'Price_Normalized']])

Normalized 'Price':
   Price  Price_Normalized
0     10          0.000000
1     20          0.833333
2     15          0.416667
3     22          1.000000
4     18          0.666667
