In [3]:
# Feature Scaling & Encoding

# Objective: Learn to scale numerical features and encode categorical features for better model performance.
# Instructions:
# For each example, perform the following steps:
#     1. Load the Dataset: Load the dataset into your environment.
#     2. Feature Scaling: Apply scaling methods (StandardScaler or MinMaxScaler) to specified numerical columns.
#     3. Feature Encoding: Apply encoding methods (One-Hot Encoding or Label Encoding) to specified categorical columns.
#     4. Verify Changes: Check the data to ensure proper scaling and encoding. 


# Task:
#     Dataset: customer_data.csv (get it by your own it includes the columns of Age , Annual_Income)
#     Columns to scale: Age , Annual_Income
#     Column to encode: Region
#     Steps:
#         1. Load customer_data.csv .
#         2. Use MinMaxScaler on Age and Annual_Income .
#         3. Perform One-Hot Encoding on Region .
#         4. Verify by assessing the transformed dataset.
csv_content = """CustomerID,Age,Annual_Income,Region
1,25,50000,North
2,35,60000,South
3,45,80000,East
4,28,52000,West
5,40,62000,North
6,50,90000,South
7,22,48000,East
8,38,70000,West
"""

with open("customer_data.csv", "w") as file:
    file.write(csv_content)

print("customer_data.csv created successfully.")



    
    
    

customer_data.csv created successfully.


In [4]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load the dataset
df = pd.read_csv("customer_data.csv")
print("Original DataFrame:")
print(df)

# Initialize MinMaxScaler
scaler = MinMaxScaler()

# Apply MinMaxScaler to 'Age' and 'Annual_Income'
df[['Age', 'Annual_Income']] = scaler.fit_transform(df[['Age', 'Annual_Income']])

print("\nDataFrame after Min-Max scaling:")
print(df)

# One-Hot Encode 'Region'
df_encoded = pd.get_dummies(df, columns=['Region'])

print("\nDataFrame after One-Hot Encoding 'Region':")
print(df_encoded)


Original DataFrame:
   CustomerID  Age  Annual_Income Region
0           1   25          50000  North
1           2   35          60000  South
2           3   45          80000   East
3           4   28          52000   West
4           5   40          62000  North
5           6   50          90000  South
6           7   22          48000   East
7           8   38          70000   West

DataFrame after Min-Max scaling:
   CustomerID       Age  Annual_Income Region
0           1  0.107143       0.047619  North
1           2  0.464286       0.285714  South
2           3  0.821429       0.761905   East
3           4  0.214286       0.095238   West
4           5  0.642857       0.333333  North
5           6  1.000000       1.000000  South
6           7  0.000000       0.000000   East
7           8  0.571429       0.523810   West

DataFrame after One-Hot Encoding 'Region':
   CustomerID       Age  Annual_Income  Region_East  Region_North  \
0           1  0.107143       0.047619        Fals