In [2]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd

# Read the CSV file into a DataFrame
dataFrame = pd.read_csv('../../Sample_Data.csv')

dataFrame.head()

Unnamed: 0,Name,Gender,City,Passed
0,Aman,Male,Delhi,Yes
1,Priya,Female,Mumbai,Yes
2,Rahul,Male,Bangalore,No
3,Anjali,Female,Mumbai,Yes
4,Ravi,Male,Delhi,Yes


## Text Encoding

### Label Encoding

In [3]:
# Label Encoding is the process of converting categorical text data into model-understandable numerical data.
# Its main purpose is to transform non-numeric labels into numeric labels so that machine learning algorithms can process them.
# Example: Converting Gender (Male, Female) into (0, 1) or Passed (Yes, No) into (1, 0)
newDataFrame = dataFrame.copy()
newDataFrame['Gender Encoded'] = LabelEncoder().fit_transform(newDataFrame['Gender'])
newDataFrame['Passed Encoded'] = LabelEncoder().fit_transform(newDataFrame['Passed'])
newDataFrame.head()


Unnamed: 0,Name,Gender,City,Passed,Gender Encoded,Passed Encoded
0,Aman,Male,Delhi,Yes,1,1
1,Priya,Female,Mumbai,Yes,0,1
2,Rahul,Male,Bangalore,No,1,0
3,Anjali,Female,Mumbai,Yes,0,1
4,Ravi,Male,Delhi,Yes,1,1


### One Hot Encoding

In [4]:
# One hot encoding is a technique used to convert categorical variables into a binary matrix representation.
# Each category is represented as a separate binary column (0 or 1), indicating the presence or absence of that category in the data.
# This is particularly useful for machine learning algorithms that require numerical input and cannot directly handle categorical data.
# Example: Converting City (New York, Los Angeles, Chicago) into three binary columns: Is_New_York, Is_Los_Angeles, Is_Chicago
newDataFrame = pd.get_dummies(newDataFrame, columns=['City'])
newDataFrame.head()

Unnamed: 0,Name,Gender,Passed,Gender Encoded,Passed Encoded,City_Bangalore,City_Chennai,City_Delhi,City_Mumbai
0,Aman,Male,Yes,1,1,False,False,True,False
1,Priya,Female,Yes,0,1,False,False,False,True
2,Rahul,Male,No,1,0,True,False,False,False
3,Anjali,Female,Yes,0,1,False,False,False,True
4,Ravi,Male,Yes,1,1,False,False,True,False
