In [8]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, OneHotEncoder

# Sample DataFrame
data = {
    'color': ['red', 'blue', 'green', 'red', 'blue'],
    'size': ['small', 'medium', 'large', 'medium', 'small'],
    'grade': ['A', 'B', 'C', 'B', 'A'],
    'city': ['Bengaluru', 'Mysuru', 'Hubli', 'Bengaluru', 'Mysuru']
}
df = pd.DataFrame(data)

df

Unnamed: 0,color,size,grade,city
0,red,small,A,Bengaluru
1,blue,medium,B,Mysuru
2,green,large,C,Hubli
3,red,medium,B,Bengaluru
4,blue,small,A,Mysuru


In [4]:
#encoded_df = df.copy()
encoded_df = df
encoded_df

Unnamed: 0,color,size,grade,city
0,red,small,A,Bengaluru
1,blue,medium,B,Mysuru
2,green,large,C,Hubli
3,red,medium,B,Bengaluru
4,blue,small,A,Mysuru


In [7]:
# 1. Label Encoding
encoded_df = df.copy()
#encoded_df = df
label_encoder = LabelEncoder()
encoded_df['color_label'] = label_encoder.fit_transform(encoded_df['color'])
encoded_df

Unnamed: 0,color,size,grade,city,color_label
0,red,small,A,Bengaluru,2
1,blue,medium,B,Mysuru,0
2,green,large,C,Hubli,1
3,red,medium,B,Bengaluru,2
4,blue,small,A,Mysuru,0


In [9]:
# 2. Ordinal Encoding
encoded_df = df.copy()
ordinal_encoder = OrdinalEncoder(categories=[['small', 'medium', 'large']])
encoded_df['size_ordinal'] = ordinal_encoder.fit_transform(encoded_df[['size']])
encoded_df

Unnamed: 0,color,size,grade,city,size_ordinal
0,red,small,A,Bengaluru,0.0
1,blue,medium,B,Mysuru,1.0
2,green,large,C,Hubli,2.0
3,red,medium,B,Bengaluru,1.0
4,blue,small,A,Mysuru,0.0


In [10]:
# One-Hot Encoding 
encoded_df = pd.get_dummies(df, columns=['color'], drop_first=False)
encoded_df

Unnamed: 0,size,grade,city,color_blue,color_green,color_red
0,small,A,Bengaluru,False,False,True
1,medium,B,Mysuru,True,False,False
2,large,C,Hubli,False,True,False
3,medium,B,Bengaluru,False,False,True
4,small,A,Mysuru,True,False,False


In [11]:
#  Dummy Encoding
encoded_df = pd.get_dummies(df, columns=['color'], drop_first=True)
encoded_df

Unnamed: 0,size,grade,city,color_green,color_red
0,small,A,Bengaluru,False,True
1,medium,B,Mysuru,False,False
2,large,C,Hubli,True,False
3,medium,B,Bengaluru,False,True
4,small,A,Mysuru,False,False


In [12]:
from category_encoders import BinaryEncoder

# Create Binary Encoder
binary_encoder = BinaryEncoder(cols=['grade'])

# Apply Binary Encoding to 'grade'
encoded_df = binary_encoder.fit_transform(df)
encoded_df

Unnamed: 0,color,size,grade_0,grade_1,city
0,red,small,0,1,Bengaluru
1,blue,medium,1,0,Mysuru
2,green,large,1,1,Hubli
3,red,medium,1,0,Bengaluru
4,blue,small,0,1,Mysuru


In [13]:
# 5. Frequency Encoding
encoded_df = df.copy()
encoded_df['city_encoded'] = df['city'].map(df['city'].value_counts())
encoded_df

Unnamed: 0,color,size,grade,city,city_encoded
0,red,small,A,Bengaluru,2
1,blue,medium,B,Mysuru,2
2,green,large,C,Hubli,1
3,red,medium,B,Bengaluru,2
4,blue,small,A,Mysuru,2
