In [None]:
import pandas as pd

data = {"Color": ["Red", "Blue", "Green", "Blue", "Red"]}
dataframe = pd.DataFrame(data)
print(dataframe)

   Color
0    Red
1   Blue
2  Green
3   Blue
4    Red


In [None]:
one_hot_encoded_df = pd.get_dummies(dataframe, columns=["Color"])
print(one_hot_encoded_df)

   Color_Blue  Color_Green  Color_Red
0       False        False       True
1        True        False      False
2       False         True      False
3        True        False      False
4       False        False       True


Dummy Variable Trap

In [None]:
## avoid the redundant information and get rid of the multicollinearity
one_hot_encoded_df = pd.get_dummies(dataframe, columns=["Color"], drop_first=True)
print(one_hot_encoded_df)

   Color_Green  Color_Red
0        False       True
1        False      False
2         True      False
3        False      False
4        False       True


In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

# Load the dataset
file_path = "/content/carprices.csv"
car_data = pd.read_csv(file_path)

# Display the first few rows of the dataset to understand its structure
print("Original Data:\n", car_data.head(14))

Original Data:
                 Car Model  Mileage  Sell Price($)  Age(yrs)
0                  BMW X5    69000          18000         6
1                  BMW X5    35000          34000         3
2                  BMW X5    57000          26100         5
3                  BMW X5    22500          40000         2
4                  BMW X5    46000          31500         4
5                 Audi A5    59000          29400         5
6                 Audi A5    52000          32000         5
7                 Audi A5    72000          19300         6
8                 Audi A5    91000          12000         8
9   Mercedez Benz C class    67000          22000         6
10  Mercedez Benz C class    83000          20000         7
11  Mercedez Benz C class    79000          21000         7
12  Mercedez Benz C class    59000          33000         5


In [None]:
car_data.dtypes

Unnamed: 0,0
Car Model,object
Mileage,int64
Sell Price($),int64
Age(yrs),int64


In [None]:
car_data.shape

(13, 4)

In [None]:
# Extract the 'Car Model' column
car_models = car_data[["Car Model"]]

# Apply One-Hot Encoding
one_hot_encoder = OneHotEncoder(sparse=False)
one_hot_encoded = one_hot_encoder.fit_transform(car_models)

# Convert one-hot encoding result to DataFrame
one_hot_encoded_df = pd.DataFrame(
    one_hot_encoded, columns=one_hot_encoder.get_feature_names_out(["Car Model"])
)

# Combine the one-hot encoded columns with the original data
car_data_one_hot_encoded = pd.concat([car_data, one_hot_encoded_df], axis=1)

# Display the one-hot encoded data
print("\nOne-Hot Encoded Data:\n", car_data_one_hot_encoded.head(14))


One-Hot Encoded Data:
                 Car Model  Mileage  Sell Price($)  Age(yrs)  \
0                  BMW X5    69000          18000         6   
1                  BMW X5    35000          34000         3   
2                  BMW X5    57000          26100         5   
3                  BMW X5    22500          40000         2   
4                  BMW X5    46000          31500         4   
5                 Audi A5    59000          29400         5   
6                 Audi A5    52000          32000         5   
7                 Audi A5    72000          19300         6   
8                 Audi A5    91000          12000         8   
9   Mercedez Benz C class    67000          22000         6   
10  Mercedez Benz C class    83000          20000         7   
11  Mercedez Benz C class    79000          21000         7   
12  Mercedez Benz C class    59000          33000         5   

    Car Model_Audi A5  Car Model_BMW X5  Car Model_Mercedez Benz C class  
0                 



In [None]:
# Apply Label Encoding
label_encoder = LabelEncoder()
label_encoded = label_encoder.fit_transform(car_models["Car Model"])

# Add the label encoded column to the original data
car_data_label_encoded = car_data.copy()
car_data_label_encoded["Car Model (Label Encoded)"] = label_encoded

# Display the label encoded data
print("\nLabel Encoded Data:\n", car_data_label_encoded.head(14))


Label Encoded Data:
                 Car Model  Mileage  Sell Price($)  Age(yrs)  \
0                  BMW X5    69000          18000         6   
1                  BMW X5    35000          34000         3   
2                  BMW X5    57000          26100         5   
3                  BMW X5    22500          40000         2   
4                  BMW X5    46000          31500         4   
5                 Audi A5    59000          29400         5   
6                 Audi A5    52000          32000         5   
7                 Audi A5    72000          19300         6   
8                 Audi A5    91000          12000         8   
9   Mercedez Benz C class    67000          22000         6   
10  Mercedez Benz C class    83000          20000         7   
11  Mercedez Benz C class    79000          21000         7   
12  Mercedez Benz C class    59000          33000         5   

    Car Model (Label Encoded)  
0                           1  
1                           1  