In [3]:
# Feature Scaling & Encoding

# Objective: Learn to scale numerical features and encode categorical features for better model performance.
# Instructions:
# For each example, perform the following steps:
#     1. Load the Dataset: Load the dataset into your environment.
#     2. Feature Scaling: Apply scaling methods (StandardScaler or MinMaxScaler) to specified numerical columns.
#     3. Feature Encoding: Apply encoding methods (One-Hot Encoding or Label Encoding) to specified categorical columns.
#     4. Verify Changes: Check the data to ensure proper scaling and encoding. 


# Task:
#   Dataset: car_features.csv (get it by your own it includes the columns of Mileage , Horsepower)
    # Columns to scale: Mileage , Horsepower
    # Column to encode: Fuel_Type
    # Steps:
    #     1. Load car_features.csv .
    #     2. Scale Mileage and Horsepower using StandardScaler.
    #     3. Encode Fuel_Type using Label Encoding.
    #     4. Confirm scaling and encoding by checking these columns.



    
    
    

In [4]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
sample_data = """
Mileage,Horsepower,Fuel_Type
45000,180,Gasoline
32000,220,Diesel
78000,150,Hybrid
55000,190,Gasoline
23000,250,Diesel
"""
car_data = pd.read_csv(pd.io.common.StringIO(sample_data))
scaler = StandardScaler()
numerical_cols = ['Mileage', 'Horsepower']
car_data[numerical_cols] = scaler.fit_transform(car_data[numerical_cols])
encoder = LabelEncoder()
car_data['Fuel_Type'] = encoder.fit_transform(car_data['Fuel_Type'])
print("Transformed Data:")
print(car_data)
print("\nFuel Type Mapping:")
for i, label in enumerate(encoder.classes_):
    print(f"{label}: {i}")
print("\nScaled Statistics:")
print(car_data[numerical_cols].describe())

Transformed Data:
    Mileage  Horsepower  Fuel_Type
0 -0.083652   -0.524891          1
1 -0.763321    0.641533          0
2  1.641663   -1.399708          2
3  0.439171   -0.233285          1
4 -1.233861    1.516351          0

Fuel Type Mapping:
Diesel: 0
Gasoline: 1
Hybrid: 2

Scaled Statistics:
        Mileage  Horsepower
count  5.000000    5.000000
mean   0.000000    0.000000
std    1.118034    1.118034
min   -1.233861   -1.399708
25%   -0.763321   -0.524891
50%   -0.083652   -0.233285
75%    0.439171    0.641533
max    1.641663    1.516351
