One-Hot Encoding creates a binary column for each category.

Label Encoding assigns a unique integer to each category.

Ordinal Encoding assigns integers to categories while preserving order.

Count Encoding replaces categories with their counts.

Hashing Encoding uses hashing to transform categories into a fixed-size feature space.

Importing All the Libraries required

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder,OrdinalEncoder
from sklearn.feature_extraction import FeatureHasher

Reading the CSV files

In [3]:
df = pd.read_csv('student_mark.csv')

** 1. One-Hot Encoding**

In [4]:
one_hot_encoding = pd.get_dummies(df,columns=['Marks'])
one_hot_encoding

Unnamed: 0,Name,Gender,DOB,Marks_25,Marks_45,Marks_54,Marks_55,Marks_58,Marks_75,Marks_78,Marks_96
0,John,M,05-04-1988,False,False,False,True,False,False,False,False
1,Suresh,M,04-05-1987,False,False,False,False,False,True,False,False
2,Ramesh,M,25-05-1989,True,False,False,False,False,False,False,False
3,Jessica,F,12-08-1990,False,False,False,False,False,False,True,False
4,Jennifer,F,02-09-1989,False,False,False,False,True,False,False,False
5,Annu,F,05-04-1988,False,True,False,False,False,False,False,False
6,pooja,F,04-05-1987,False,False,False,True,False,False,False,False
7,Ritesh,M,25-05-1989,False,False,True,False,False,False,False,False
8,Farha,F,12-08-1990,False,False,False,True,False,False,False,False
9,Mukesh,M,02-09-1989,False,False,False,False,False,False,False,True


Label encoding

In [5]:
label_encoder = LabelEncoder()
df['encoded_name'] = label_encoder.fit_transform(df['Name'])
df

Unnamed: 0,Name,Gender,DOB,Marks,encoded_name
0,John,M,05-04-1988,55,4
1,Suresh,M,04-05-1987,75,8
2,Ramesh,M,25-05-1989,25,6
3,Jessica,F,12-08-1990,78,3
4,Jennifer,F,02-09-1989,58,2
5,Annu,F,05-04-1988,45,0
6,pooja,F,04-05-1987,55,9
7,Ritesh,M,25-05-1989,54,7
8,Farha,F,12-08-1990,55,1
9,Mukesh,M,02-09-1989,96,5


Ordinal Encoding

In [6]:
sizes = pd.DataFrame({
    'Size': ['Small', 'Large', 'Medium', 'Large', 'Small']
})

categories = [['Small', 'Medium', 'Large']]

ordinal_encoder = OrdinalEncoder(categories=categories)
sizes['Size_encoded'] = ordinal_encoder.fit_transform(sizes[['Size']])
sizes

Unnamed: 0,Size,Size_encoded
0,Small,0.0
1,Large,2.0
2,Medium,1.0
3,Large,2.0
4,Small,0.0


Count Encoding

In [7]:
count_encoding = sizes['Size'].value_counts()
sizes['size_encode'] = sizes['Size'].map(count_encoding)
sizes

Unnamed: 0,Size,Size_encoded,size_encode
0,Small,0.0,2
1,Large,2.0,2
2,Medium,1.0,1
3,Large,2.0,2
4,Small,0.0,2


Hashing Encoding

In [8]:
df_dict = df.to_dict(orient='records')
hasher = FeatureHasher(n_features=3, input_type='dict')
df_hashing = hasher.fit_transform(df_dict).toarray()
df_hashing

array([[ -2.,   5., -55.],
       [ -1.,   8., -75.],
       [ -1.,   6., -25.],
       [  1.,   2., -79.],
       [  0.,   3., -58.],
       [ -1.,   0., -45.],
       [  0.,   8., -55.],
       [  0.,   8., -54.],
       [  0.,   0., -57.],
       [  0.,   6., -96.]])