# Data Encoding
    1. Nominal/One Hot Encoder Encoding
    2. Label and Ordinal Encoding
    3. Target Guided Ordinal Encoding

## 1. Nominal/One Hot Encoder Encoding
*eg:*
        1. Red: [1,0,0]
        2. Green: [0,1,0]
        3. Blue: [0,0,1]

In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

In [4]:
# Create a Simple DataFrame
df=pd.DataFrame({
    'color': ['red','blue','green','green','red','blue','green','red','blue','green','red','blue','green','red','blue','green','red','blue']
})

In [5]:
df

Unnamed: 0,color
0,red
1,blue
2,green
3,green
4,red
5,blue
6,green
7,red
8,blue
9,green


In [6]:
# Create an Instance of OnehotEncoder
encoder=OneHotEncoder()

In [9]:
encoded=encoder.fit_transform(df[['color']]).toarray()

In [10]:
encoded

array([[0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.]])

In [13]:
import pandas as pd
encoded_df=pd.DataFrame(encoded,columns=encoder.get_feature_names_out())

In [14]:
encoded_df

Unnamed: 0,color_blue,color_green,color_red
0,0.0,0.0,1.0
1,1.0,0.0,0.0
2,0.0,1.0,0.0
3,0.0,1.0,0.0
4,0.0,0.0,1.0
5,1.0,0.0,0.0
6,0.0,1.0,0.0
7,0.0,0.0,1.0
8,1.0,0.0,0.0
9,0.0,1.0,0.0


In [16]:
encoder.transform([['red']]).toarray()



array([[0., 0., 1.]])

In [18]:
pd.concat([df,encoded_df],axis=1)

Unnamed: 0,color,color_blue,color_green,color_red
0,red,0.0,0.0,1.0
1,blue,1.0,0.0,0.0
2,green,0.0,1.0,0.0
3,green,0.0,1.0,0.0
4,red,0.0,0.0,1.0
5,blue,1.0,0.0,0.0
6,green,0.0,1.0,0.0
7,red,0.0,0.0,1.0
8,blue,1.0,0.0,0.0
9,green,0.0,1.0,0.0


In [19]:
import seaborn as sns
df=sns.load_dataset('tips')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [23]:
sex_encoded=encoder.fit_transform(df[['sex']]).toarray()

In [25]:
sex_encoded_df1=pd.DataFrame(sex_encoded,columns=encoder.get_feature_names_out())

In [26]:
sex_encoded_df1

Unnamed: 0,sex_Female,sex_Male
0,1.0,0.0
1,0.0,1.0
2,0.0,1.0
3,0.0,1.0
4,1.0,0.0
...,...,...
239,0.0,1.0
240,1.0,0.0
241,0.0,1.0
242,0.0,1.0


In [27]:
smoker_day_time=encoder.fit_transform(df[['smoker','day','time']]).toarray()

In [28]:
smoker_day_time

array([[1., 0., 0., ..., 0., 1., 0.],
       [1., 0., 0., ..., 0., 1., 0.],
       [1., 0., 0., ..., 0., 1., 0.],
       ...,
       [0., 1., 0., ..., 0., 1., 0.],
       [1., 0., 0., ..., 0., 1., 0.],
       [1., 0., 0., ..., 1., 1., 0.]])

In [29]:
smoker_day_time_df2=pd.DataFrame(smoker_day_time,columns=encoder.get_feature_names_out())

In [30]:
smoker_day_time_df2

Unnamed: 0,smoker_No,smoker_Yes,day_Fri,day_Sat,day_Sun,day_Thur,time_Dinner,time_Lunch
0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
1,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
2,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
3,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
4,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...
239,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
240,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0
241,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0
242,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0


In [32]:
pd.concat([df,sex_encoded_df1,smoker_day_time_df2],axis=1)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,sex_Female,sex_Male,smoker_No,smoker_Yes,day_Fri,day_Sat,day_Sun,day_Thur,time_Dinner,time_Lunch
0,16.99,1.01,Female,No,Sun,Dinner,2,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
1,10.34,1.66,Male,No,Sun,Dinner,3,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
2,21.01,3.50,Male,No,Sun,Dinner,3,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
3,23.68,3.31,Male,No,Sun,Dinner,2,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
4,24.59,3.61,Female,No,Sun,Dinner,4,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
240,27.18,2.00,Female,Yes,Sat,Dinner,2,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0
241,22.67,2.00,Male,Yes,Sat,Dinner,2,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0
242,17.82,1.75,Male,No,Sat,Dinner,2,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
