In [13]:
import pandas as pd

# Read dataset
df = pd.read_csv("data/Toyota.csv")
df.head(5)

Unnamed: 0.1,Unnamed: 0,Price,Age,KM,FuelType,HP,MetColor,Automatic,CC,Doors,Weight
0,0,13500,23.0,46986,Diesel,90,1.0,0,2000,three,1165
1,1,13750,23.0,72937,Diesel,90,1.0,0,2000,3,1165
2,2,13950,24.0,41711,Diesel,90,,0,2000,3,1165
3,3,14950,26.0,48000,Diesel,90,0.0,0,2000,3,1165
4,4,13750,30.0,38500,Diesel,90,0.0,0,2000,3,1170


In [14]:
# a. Get unique values of categorical 'Doors'
unique_doors = df['Doors'].unique()  # Get unique values of the 'Doors' column
unique_doors  # 🔹 Display unique values of 'Doors'

array(['three', '3', '5', '4', 'four', 'five', '2'], dtype=object)

In [15]:
# b. Transform all categorical columns to the same format (e.g., lowercase)
categorical_cols = df.select_dtypes(include=['object']).columns  # Get all categorical columns (strings)
for col in categorical_cols:
    df[col] = df[col].str.lower()  # Convert each categorical column to lowercase
df[categorical_cols]  # Display transformed categorical columns

Unnamed: 0,KM,FuelType,HP,Doors
0,46986,diesel,90,three
1,72937,diesel,90,3
2,41711,diesel,90,3
3,48000,diesel,90,3
4,38500,diesel,90,3
...,...,...,...,...
1431,20544,petrol,86,3
1432,??,petrol,86,3
1433,17016,petrol,86,3
1434,??,,86,3


In [16]:
# c. Apply Decimal scaling normalization on 'HP' column
# First, replace non-numeric entries with NaN
df['HP'] = pd.to_numeric(df['HP'], errors='coerce')

In [17]:
# Now apply decimal scaling normalization, ignoring NaN values
max_hp = df['HP'].max()
if max_hp > 0:
    df['HP_normalized'] = df['HP'] / 10**(len(str(int(max_hp)))-1)

In [18]:
df[['HP', 'HP_normalized']]  # Display original 'HP' and normalized 'HP'

Unnamed: 0,HP,HP_normalized
0,90.0,0.90
1,90.0,0.90
2,90.0,0.90
3,90.0,0.90
4,90.0,0.90
...,...,...
1431,86.0,0.86
1432,86.0,0.86
1433,86.0,0.86
1434,86.0,0.86
