In [1]:
# Feature Scaling & Encoding

# Objective: Learn to scale numerical features and encode categorical features for better model performance.
# Instructions:
# For each example, perform the following steps:
#     1. Load the Dataset: Load the dataset into your environment.
#     2. Feature Scaling: Apply scaling methods (StandardScaler or MinMaxScaler) to specified numerical columns.
#     3. Feature Encoding: Apply encoding methods (One-Hot Encoding or Label Encoding) to specified categorical columns.
#     4. Verify Changes: Check the data to ensure proper scaling and encoding. 


# Task:
#   Dataset: car_features.csv (get it by your own it includes the columns of Mileage , Horsepower)
    # Columns to scale: Mileage , Horsepower
    # Column to encode: Fuel_Type
    # Steps:
    #     1. Load car_features.csv .
    #     2. Scale Mileage and Horsepower using StandardScaler.
    #     3. Encode Fuel_Type using Label Encoding.
    #     4. Confirm scaling and encoding by checking these columns.

import pandas as pd

# Create the dataset
data = {
    'Mileage': [25, 30, 22, 27, 24, 29, 35, 28, 26, 31],
    'Horsepower': [120, 140, 110, 160, 130, 150, 170, 125, 135, 155],
    'Fuel_Type': ['Petrol', 'Diesel', 'Petrol', 'Diesel', 'Petrol', 'Diesel', 'Petrol', 'Diesel', 'Petrol', 'Diesel']
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('car_features.csv', index=False)

print("CSV file 'car_features.csv' created successfully.")

# Task:
#   Dataset: car_features.csv (get it by your own it includes the columns of Mileage , Horsepower)
    # Columns to scale: Mileage , Horsepower
    # Column to encode: Fuel_Type
    # Steps:
    #     1. Load car_features.csv .
    #     2. Scale Mileage and Horsepower using StandardScaler.
    #     3. Encode Fuel_Type using Label Encoding.
    #     4. Confirm scaling and encoding by checking these columns.



import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Step 1: Load the dataset
df = pd.read_csv('car_features.csv')

# Display the first few rows to inspect the data
print("Original DataFrame:")
print(df.head())

# Step 2: Apply StandardScaler on Mileage and Horsepower columns
scaler = StandardScaler()

# Scale the Mileage and Horsepower columns
df[['Mileage', 'Horsepower']] = scaler.fit_transform(df[['Mileage', 'Horsepower']])

# Step 3: Apply Label Encoding on Fuel_Type column
label_encoder = LabelEncoder()

# Encode the Fuel_Type column
df['Fuel_Type'] = label_encoder.fit_transform(df['Fuel_Type'])

# Step 4: Verify the changes
print("\nTransformed DataFrame:")
print(df.head())

# Optionally, check summary statistics to verify scaling
print("\nSummary Statistics after Scaling:")
print(df[['Mileage', 'Horsepower']].describe())

    
    
    

CSV file 'car_features.csv' created successfully.
Original DataFrame:
   Mileage  Horsepower Fuel_Type
0       25         120    Petrol
1       30         140    Diesel
2       22         110    Petrol
3       27         160    Diesel
4       24         130    Petrol

Transformed DataFrame:
    Mileage  Horsepower  Fuel_Type
0 -0.754378   -1.077940          1
1  0.642619    0.027639          0
2 -1.592576   -1.630730          1
3 -0.195580    1.133219          0
4 -1.033778   -0.525150          1

Summary Statistics after Scaling:
            Mileage  Horsepower
count  1.000000e+01   10.000000
mean   1.998401e-16    0.000000
std    1.054093e+00    1.054093
min   -1.592576e+00   -1.630730
25%   -6.845285e-01   -0.732447
50%   -5.587988e-02   -0.110558
75%    5.727687e-01    0.787726
max    2.039615e+00    1.686009
