In [1]:
import pandas as pd , numpy as np
import category_encoders as ce

import warnings
warnings.filterwarnings('ignore')

In [2]:
data = {'Temperature':['Hot','Cold','Very Hot','Warm','Hot','Warm','Warm','Hot','Hot','Cold'],
        'Color':['Red','Yellow','Blue','Blue','Red','Yellow','Red','Yellow','Yellow','Yellow'],
        'Target':[1,1,1,0,1,0,1,0,1,1]}
df = pd.DataFrame(data)
df

Unnamed: 0,Temperature,Color,Target
0,Hot,Red,1
1,Cold,Yellow,1
2,Very Hot,Blue,1
3,Warm,Blue,0
4,Hot,Red,1
5,Warm,Yellow,0
6,Warm,Red,1
7,Hot,Yellow,0
8,Hot,Yellow,1
9,Cold,Yellow,1


# 1. One Hot Encoding

In [3]:
# Using get_dummies method in pandas
df_ohe = df.copy()
one_hot_1 = pd.get_dummies(df_ohe,prefix = 'Temp' ,columns=['Temperature'],drop_first=False)
one_hot_1.insert(loc=2, column='Temperature', value=df.Temperature.values)
one_hot_1

Unnamed: 0,Color,Target,Temperature,Temp_Cold,Temp_Hot,Temp_Very Hot,Temp_Warm
0,Red,1,Hot,False,True,False,False
1,Yellow,1,Cold,True,False,False,False
2,Blue,1,Very Hot,False,False,True,False
3,Blue,0,Warm,False,False,False,True
4,Red,1,Hot,False,True,False,False
5,Yellow,0,Warm,False,False,False,True
6,Red,1,Warm,False,False,False,True
7,Yellow,0,Hot,False,True,False,False
8,Yellow,1,Hot,False,True,False,False
9,Yellow,1,Cold,True,False,False,False


In [4]:
from sklearn.preprocessing import OneHotEncoder
# ohe = OneHotEncoder(drop='first')
ohe = OneHotEncoder()
oh_array = ohe.fit_transform(df['Temperature'].values.reshape(-1, 1)).toarray()
oh_df = pd.DataFrame(oh_array,columns=['Temp_Cold','Temp_Hot','Temp_Very_Hot','Temp_Warm'])
pd.concat([df,oh_df],axis=1)

Unnamed: 0,Temperature,Color,Target,Temp_Cold,Temp_Hot,Temp_Very_Hot,Temp_Warm
0,Hot,Red,1,0.0,1.0,0.0,0.0
1,Cold,Yellow,1,1.0,0.0,0.0,0.0
2,Very Hot,Blue,1,0.0,0.0,1.0,0.0
3,Warm,Blue,0,0.0,0.0,0.0,1.0
4,Hot,Red,1,0.0,1.0,0.0,0.0
5,Warm,Yellow,0,0.0,0.0,0.0,1.0
6,Warm,Red,1,0.0,0.0,0.0,1.0
7,Hot,Yellow,0,0.0,1.0,0.0,0.0
8,Hot,Yellow,1,0.0,1.0,0.0,0.0
9,Cold,Yellow,1,1.0,0.0,0.0,0.0


In [5]:
import category_encoders as ce
ohe = ce.OneHotEncoder(cols=['Temperature'])
ce_ohe = ohe.fit_transform(df.iloc[:,0], df.iloc[:,-1])
ce_ohe.columns = ['Temp_Hot','Temp_Cold','Temp_Very_Hot','Temp_Warm']
pd.concat([df,ce_ohe],axis=1)

Unnamed: 0,Temperature,Color,Target,Temp_Hot,Temp_Cold,Temp_Very_Hot,Temp_Warm
0,Hot,Red,1,1,0,0,0
1,Cold,Yellow,1,0,1,0,0
2,Very Hot,Blue,1,0,0,1,0
3,Warm,Blue,0,0,0,0,1
4,Hot,Red,1,1,0,0,0
5,Warm,Yellow,0,0,0,0,1
6,Warm,Red,1,0,0,0,1
7,Hot,Yellow,0,1,0,0,0
8,Hot,Yellow,1,1,0,0,0
9,Cold,Yellow,1,0,1,0,0


# 2. Label Encoding

In [6]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df_ohe['Temperature_encoded'] = le.fit_transform(df.Temperature)
df_ohe

Unnamed: 0,Temperature,Color,Target,Temperature_encoded
0,Hot,Red,1,1
1,Cold,Yellow,1,0
2,Very Hot,Blue,1,2
3,Warm,Blue,0,3
4,Hot,Red,1,1
5,Warm,Yellow,0,3
6,Warm,Red,1,3
7,Hot,Yellow,0,1
8,Hot,Yellow,1,1
9,Cold,Yellow,1,0


In [7]:
fact = df.copy()
fact['Temperature_factor'] = pd.factorize(df.Temperature)[0]
fact

Unnamed: 0,Temperature,Color,Target,Temperature_factor
0,Hot,Red,1,0
1,Cold,Yellow,1,1
2,Very Hot,Blue,1,2
3,Warm,Blue,0,3
4,Hot,Red,1,0
5,Warm,Yellow,0,3
6,Warm,Red,1,3
7,Hot,Yellow,0,0
8,Hot,Yellow,1,0
9,Cold,Yellow,1,1


# 3. Ordinal Encoding

In [8]:
from sklearn.preprocessing import OrdinalEncoder
oe = OrdinalEncoder()
oe_val = oe.fit_transform(df['Temperature'].values.reshape(-1, 1))
pd.concat([df,pd.DataFrame(oe_val,columns=['Temperature_Oe'])],axis=1)

Unnamed: 0,Temperature,Color,Target,Temperature_Oe
0,Hot,Red,1,1.0
1,Cold,Yellow,1,0.0
2,Very Hot,Blue,1,2.0
3,Warm,Blue,0,3.0
4,Hot,Red,1,1.0
5,Warm,Yellow,0,3.0
6,Warm,Red,1,3.0
7,Hot,Yellow,0,1.0
8,Hot,Yellow,1,1.0
9,Cold,Yellow,1,0.0


In [9]:
import category_encoders as ce
ohe = ce.OrdinalEncoder(cols=['Temperature'])
df['Temp_ce_oe'] = ohe.fit_transform(df.iloc[:,0], df.iloc[:,-1])
df

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe
0,Hot,Red,1,1
1,Cold,Yellow,1,2
2,Very Hot,Blue,1,3
3,Warm,Blue,0,4
4,Hot,Red,1,1
5,Warm,Yellow,0,4
6,Warm,Red,1,4
7,Hot,Yellow,0,1
8,Hot,Yellow,1,1
9,Cold,Yellow,1,2


In [10]:
# Best way is mapping based on their actual label order
# Ex : Cold < Warm <Hot < Very Hot = 1 < 2 < 3 < 4
Temp_order = {'Cold' : 1 , 'Warm' : 2 , 'Hot' : 3 , 'Very Hot' : 4}
df['Temperature_Order'] = df.Temperature.map(Temp_order)
df

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe,Temperature_Order
0,Hot,Red,1,1,3
1,Cold,Yellow,1,2,1
2,Very Hot,Blue,1,3,4
3,Warm,Blue,0,4,2
4,Hot,Red,1,1,3
5,Warm,Yellow,0,4,2
6,Warm,Red,1,4,2
7,Hot,Yellow,0,1,3
8,Hot,Yellow,1,1,3
9,Cold,Yellow,1,2,1


# 4. Frequency or Count Encoder

In [11]:
# Using Pandas groupby()
cat_freq = df.groupby('Temperature').size() / len(df)
df['Temp_Freq_Enc'] = df.Temperature.map(cat_freq)
df

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe,Temperature_Order,Temp_Freq_Enc
0,Hot,Red,1,1,3,0.4
1,Cold,Yellow,1,2,1,0.2
2,Very Hot,Blue,1,3,4,0.1
3,Warm,Blue,0,4,2,0.3
4,Hot,Red,1,1,3,0.4
5,Warm,Yellow,0,4,2,0.3
6,Warm,Red,1,4,2,0.3
7,Hot,Yellow,0,1,3,0.4
8,Hot,Yellow,1,1,3,0.4
9,Cold,Yellow,1,2,1,0.2


In [12]:
# Using category_encoders CountEncoder
import category_encoders as ce
ce = ce.CountEncoder(cols=['Temperature'])
df['Temp_Count_Enc'] = ce.fit_transform(df.iloc[:,0], df.iloc[:,-1])
df

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe,Temperature_Order,Temp_Freq_Enc,Temp_Count_Enc
0,Hot,Red,1,1,3,0.4,4
1,Cold,Yellow,1,2,1,0.2,2
2,Very Hot,Blue,1,3,4,0.1,1
3,Warm,Blue,0,4,2,0.3,3
4,Hot,Red,1,1,3,0.4,4
5,Warm,Yellow,0,4,2,0.3,3
6,Warm,Red,1,4,2,0.3,3
7,Hot,Yellow,0,1,3,0.4,4
8,Hot,Yellow,1,1,3,0.4,4
9,Cold,Yellow,1,2,1,0.2,2


# 5. Binary Encoding

In [13]:
import category_encoders as ce
be = ce.BinaryEncoder(cols=['Temperature'])
be_df = be.fit_transform(df['Temperature'])
pd.concat([df,be_df],axis=1)

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe,Temperature_Order,Temp_Freq_Enc,Temp_Count_Enc,Temperature_0,Temperature_1,Temperature_2
0,Hot,Red,1,1,3,0.4,4,0,0,1
1,Cold,Yellow,1,2,1,0.2,2,0,1,0
2,Very Hot,Blue,1,3,4,0.1,1,0,1,1
3,Warm,Blue,0,4,2,0.3,3,1,0,0
4,Hot,Red,1,1,3,0.4,4,0,0,1
5,Warm,Yellow,0,4,2,0.3,3,1,0,0
6,Warm,Red,1,4,2,0.3,3,1,0,0
7,Hot,Yellow,0,1,3,0.4,4,0,0,1
8,Hot,Yellow,1,1,3,0.4,4,0,0,1
9,Cold,Yellow,1,2,1,0.2,2,0,1,0


# 6. Base-N encoder

In [14]:
bne = ce.BaseNEncoder(cols=['Temperature'],base=2)
bne_df = bne.fit_transform(df['Temperature'],df.Target)
pd.concat([df,bne_df],axis=1)

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe,Temperature_Order,Temp_Freq_Enc,Temp_Count_Enc,Temperature_0,Temperature_1,Temperature_2
0,Hot,Red,1,1,3,0.4,4,0,0,1
1,Cold,Yellow,1,2,1,0.2,2,0,1,0
2,Very Hot,Blue,1,3,4,0.1,1,0,1,1
3,Warm,Blue,0,4,2,0.3,3,1,0,0
4,Hot,Red,1,1,3,0.4,4,0,0,1
5,Warm,Yellow,0,4,2,0.3,3,1,0,0
6,Warm,Red,1,4,2,0.3,3,1,0,0
7,Hot,Yellow,0,1,3,0.4,4,0,0,1
8,Hot,Yellow,1,1,3,0.4,4,0,0,1
9,Cold,Yellow,1,2,1,0.2,2,0,1,0


# 7. Helmert Encoding

In [15]:
import category_encoders as ce
he = ce.HelmertEncoder(cols=['Temperature'])
he_df = he.fit_transform(df['Temperature'])
pd.concat([df,he_df],axis=1)

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe,Temperature_Order,Temp_Freq_Enc,Temp_Count_Enc,intercept,Temperature_0,Temperature_1,Temperature_2
0,Hot,Red,1,1,3,0.4,4,1,-1.0,-1.0,-1.0
1,Cold,Yellow,1,2,1,0.2,2,1,1.0,-1.0,-1.0
2,Very Hot,Blue,1,3,4,0.1,1,1,0.0,2.0,-1.0
3,Warm,Blue,0,4,2,0.3,3,1,0.0,0.0,3.0
4,Hot,Red,1,1,3,0.4,4,1,-1.0,-1.0,-1.0
5,Warm,Yellow,0,4,2,0.3,3,1,0.0,0.0,3.0
6,Warm,Red,1,4,2,0.3,3,1,0.0,0.0,3.0
7,Hot,Yellow,0,1,3,0.4,4,1,-1.0,-1.0,-1.0
8,Hot,Yellow,1,1,3,0.4,4,1,-1.0,-1.0,-1.0
9,Cold,Yellow,1,2,1,0.2,2,1,1.0,-1.0,-1.0


# 8. Mean Encoding or Target Encoding

In [16]:
# Using category_encoders TargetEncoder
import category_encoders as ce
te = ce.TargetEncoder(cols=['Temperature'])
df['Temperature_ce_TarEnc'] = te.fit_transform(df['Temperature'],df.Target)
df

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe,Temperature_Order,Temp_Freq_Enc,Temp_Count_Enc,Temperature_ce_TarEnc
0,Hot,Red,1,1,3,0.4,4,0.708399
1,Cold,Yellow,1,2,1,0.2,2,0.742555
2,Very Hot,Blue,1,3,4,0.1,1,0.739033
3,Warm,Blue,0,4,2,0.3,3,0.643363
4,Hot,Red,1,1,3,0.4,4,0.708399
5,Warm,Yellow,0,4,2,0.3,3,0.643363
6,Warm,Red,1,4,2,0.3,3,0.643363
7,Hot,Yellow,0,1,3,0.4,4,0.708399
8,Hot,Yellow,1,1,3,0.4,4,0.708399
9,Cold,Yellow,1,2,1,0.2,2,0.742555


In [17]:
# Using Pandas groupby()
tar_enc = df.groupby('Temperature')['Target'].mean()
# print(tar_enc)
df['Temperature_tar_enc'] = df['Temperature'].map(tar_enc)
df

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe,Temperature_Order,Temp_Freq_Enc,Temp_Count_Enc,Temperature_ce_TarEnc,Temperature_tar_enc
0,Hot,Red,1,1,3,0.4,4,0.708399,0.75
1,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0
2,Very Hot,Blue,1,3,4,0.1,1,0.739033,1.0
3,Warm,Blue,0,4,2,0.3,3,0.643363,0.333333
4,Hot,Red,1,1,3,0.4,4,0.708399,0.75
5,Warm,Yellow,0,4,2,0.3,3,0.643363,0.333333
6,Warm,Red,1,4,2,0.3,3,0.643363,0.333333
7,Hot,Yellow,0,1,3,0.4,4,0.708399,0.75
8,Hot,Yellow,1,1,3,0.4,4,0.708399,0.75
9,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0


# 9. Weight of Evidence Encoding

In [18]:
woe = ce.WOEEncoder(cols=['Temperature'])
df['Temperature_ce_WOE'] = woe.fit_transform(df['Temperature'],df.Target)
df

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe,Temperature_Order,Temp_Freq_Enc,Temp_Count_Enc,Temperature_ce_TarEnc,Temperature_tar_enc,Temperature_ce_WOE
0,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361
1,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826
2,Very Hot,Blue,1,3,4,0.1,1,0.739033,1.0,0.0
3,Warm,Blue,0,4,2,0.3,3,0.643363,0.333333,-0.993252
4,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361
5,Warm,Yellow,0,4,2,0.3,3,0.643363,0.333333,-0.993252
6,Warm,Red,1,4,2,0.3,3,0.643363,0.333333,-0.993252
7,Hot,Yellow,0,1,3,0.4,4,0.708399,0.75,0.105361
8,Hot,Yellow,1,1,3,0.4,4,0.708399,0.75,0.105361
9,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826


# 10. Sum Encoder (Deviation Encoding or Effect Encoding)

In [19]:
se = ce.SumEncoder(cols=['Temperature'])
se_df = se.fit_transform(df['Temperature'],df.Target)
pd.concat([df,se_df],axis=1)

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe,Temperature_Order,Temp_Freq_Enc,Temp_Count_Enc,Temperature_ce_TarEnc,Temperature_tar_enc,Temperature_ce_WOE,intercept,Temperature_0,Temperature_1,Temperature_2
0,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361,1,1.0,0.0,0.0
1,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826,1,0.0,1.0,0.0
2,Very Hot,Blue,1,3,4,0.1,1,0.739033,1.0,0.0,1,0.0,0.0,1.0
3,Warm,Blue,0,4,2,0.3,3,0.643363,0.333333,-0.993252,1,-1.0,-1.0,-1.0
4,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361,1,1.0,0.0,0.0
5,Warm,Yellow,0,4,2,0.3,3,0.643363,0.333333,-0.993252,1,-1.0,-1.0,-1.0
6,Warm,Red,1,4,2,0.3,3,0.643363,0.333333,-0.993252,1,-1.0,-1.0,-1.0
7,Hot,Yellow,0,1,3,0.4,4,0.708399,0.75,0.105361,1,1.0,0.0,0.0
8,Hot,Yellow,1,1,3,0.4,4,0.708399,0.75,0.105361,1,1.0,0.0,0.0
9,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826,1,0.0,1.0,0.0


# 11. Leave-one-out Encoder (LOO or LOOE)

In [20]:
loue = ce.LeaveOneOutEncoder(cols=['Temperature'])
df['Temperature_ce_CBE'] = loue.fit_transform(df['Temperature'],df.Target)
df

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe,Temperature_Order,Temp_Freq_Enc,Temp_Count_Enc,Temperature_ce_TarEnc,Temperature_tar_enc,Temperature_ce_WOE,Temperature_ce_CBE
0,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361,0.666667
1,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826,1.0
2,Very Hot,Blue,1,3,4,0.1,1,0.739033,1.0,0.0,0.7
3,Warm,Blue,0,4,2,0.3,3,0.643363,0.333333,-0.993252,0.5
4,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361,0.666667
5,Warm,Yellow,0,4,2,0.3,3,0.643363,0.333333,-0.993252,0.5
6,Warm,Red,1,4,2,0.3,3,0.643363,0.333333,-0.993252,0.0
7,Hot,Yellow,0,1,3,0.4,4,0.708399,0.75,0.105361,1.0
8,Hot,Yellow,1,1,3,0.4,4,0.708399,0.75,0.105361,0.666667
9,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826,1.0


# 12. CatBoost Encoder

In [21]:
cbe = ce.CatBoostEncoder(cols=['Temperature'])
df['Temperature_ce_CBE'] = cbe.fit_transform(df['Temperature'],df.Target)
df

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe,Temperature_Order,Temp_Freq_Enc,Temp_Count_Enc,Temperature_ce_TarEnc,Temperature_tar_enc,Temperature_ce_WOE,Temperature_ce_CBE
0,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361,0.7
1,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826,0.7
2,Very Hot,Blue,1,3,4,0.1,1,0.739033,1.0,0.0,0.7
3,Warm,Blue,0,4,2,0.3,3,0.643363,0.333333,-0.993252,0.7
4,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361,0.85
5,Warm,Yellow,0,4,2,0.3,3,0.643363,0.333333,-0.993252,0.35
6,Warm,Red,1,4,2,0.3,3,0.643363,0.333333,-0.993252,0.233333
7,Hot,Yellow,0,1,3,0.4,4,0.708399,0.75,0.105361,0.9
8,Hot,Yellow,1,1,3,0.4,4,0.708399,0.75,0.105361,0.675
9,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826,0.85


# 13. James-Stein Encoding

In [22]:
jse = ce.JamesSteinEncoder(cols=['Temperature'])
df['Temperature_ce_JSE'] = jse.fit_transform(df['Temperature'],df.Target)
df

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe,Temperature_Order,Temp_Freq_Enc,Temp_Count_Enc,Temperature_ce_TarEnc,Temperature_tar_enc,Temperature_ce_WOE,Temperature_ce_CBE,Temperature_ce_JSE
0,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361,0.7,0.741379
1,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826,0.7,1.0
2,Very Hot,Blue,1,3,4,0.1,1,0.739033,1.0,0.0,0.7,1.0
3,Warm,Blue,0,4,2,0.3,3,0.643363,0.333333,-0.993252,0.7,0.405229
4,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361,0.85,0.741379
5,Warm,Yellow,0,4,2,0.3,3,0.643363,0.333333,-0.993252,0.35,0.405229
6,Warm,Red,1,4,2,0.3,3,0.643363,0.333333,-0.993252,0.233333,0.405229
7,Hot,Yellow,0,1,3,0.4,4,0.708399,0.75,0.105361,0.9,0.741379
8,Hot,Yellow,1,1,3,0.4,4,0.708399,0.75,0.105361,0.675,0.741379
9,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826,0.85,1.0


# 14. M-estimator Encoding

In [23]:
mee = ce.MEstimateEncoder(cols=['Temperature'],m=1.0)
df['Temperature_ce_JSE'] = mee.fit_transform(df['Temperature'],df.Target)
df

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe,Temperature_Order,Temp_Freq_Enc,Temp_Count_Enc,Temperature_ce_TarEnc,Temperature_tar_enc,Temperature_ce_WOE,Temperature_ce_CBE,Temperature_ce_JSE
0,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361,0.7,0.74
1,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826,0.7,0.9
2,Very Hot,Blue,1,3,4,0.1,1,0.739033,1.0,0.0,0.7,0.85
3,Warm,Blue,0,4,2,0.3,3,0.643363,0.333333,-0.993252,0.7,0.425
4,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361,0.85,0.74
5,Warm,Yellow,0,4,2,0.3,3,0.643363,0.333333,-0.993252,0.35,0.425
6,Warm,Red,1,4,2,0.3,3,0.643363,0.333333,-0.993252,0.233333,0.425
7,Hot,Yellow,0,1,3,0.4,4,0.708399,0.75,0.105361,0.9,0.74
8,Hot,Yellow,1,1,3,0.4,4,0.708399,0.75,0.105361,0.675,0.74
9,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826,0.85,0.9


# 15. Hashing Encoding

In [24]:
hash_df = ce.HashingEncoder(cols=['Temperature'],n_components=8)
hash_df = hash_df.fit_transform(df['Temperature'],df.Target)
pd.concat([df,hash_df],axis=1)

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe,Temperature_Order,Temp_Freq_Enc,Temp_Count_Enc,Temperature_ce_TarEnc,Temperature_tar_enc,Temperature_ce_WOE,Temperature_ce_CBE,Temperature_ce_JSE,col_0,col_1,col_2,col_3,col_4,col_5,col_6,col_7
0,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361,0.7,0.74,1,0,0,0,0,0,0,0
1,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826,0.7,0.9,0,0,1,0,0,0,0,0
2,Very Hot,Blue,1,3,4,0.1,1,0.739033,1.0,0.0,0.7,0.85,0,1,0,0,0,0,0,0
3,Warm,Blue,0,4,2,0.3,3,0.643363,0.333333,-0.993252,0.7,0.425,0,1,0,0,0,0,0,0
4,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361,0.85,0.74,1,0,0,0,0,0,0,0
5,Warm,Yellow,0,4,2,0.3,3,0.643363,0.333333,-0.993252,0.35,0.425,0,1,0,0,0,0,0,0
6,Warm,Red,1,4,2,0.3,3,0.643363,0.333333,-0.993252,0.233333,0.425,0,1,0,0,0,0,0,0
7,Hot,Yellow,0,1,3,0.4,4,0.708399,0.75,0.105361,0.9,0.74,1,0,0,0,0,0,0,0
8,Hot,Yellow,1,1,3,0.4,4,0.708399,0.75,0.105361,0.675,0.74,1,0,0,0,0,0,0,0
9,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826,0.85,0.9,0,0,1,0,0,0,0,0


# 16. Backward Difference Encoding

In [25]:
bde = ce.BackwardDifferenceEncoder(cols=['Temperature'])
bde_df = bde.fit_transform(df['Temperature'],df.Target)
pd.concat([df,bde_df],axis=1)

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe,Temperature_Order,Temp_Freq_Enc,Temp_Count_Enc,Temperature_ce_TarEnc,Temperature_tar_enc,Temperature_ce_WOE,Temperature_ce_CBE,Temperature_ce_JSE,intercept,Temperature_0,Temperature_1,Temperature_2
0,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361,0.7,0.74,1,-0.75,-0.5,-0.25
1,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826,0.7,0.9,1,0.25,-0.5,-0.25
2,Very Hot,Blue,1,3,4,0.1,1,0.739033,1.0,0.0,0.7,0.85,1,0.25,0.5,-0.25
3,Warm,Blue,0,4,2,0.3,3,0.643363,0.333333,-0.993252,0.7,0.425,1,0.25,0.5,0.75
4,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361,0.85,0.74,1,-0.75,-0.5,-0.25
5,Warm,Yellow,0,4,2,0.3,3,0.643363,0.333333,-0.993252,0.35,0.425,1,0.25,0.5,0.75
6,Warm,Red,1,4,2,0.3,3,0.643363,0.333333,-0.993252,0.233333,0.425,1,0.25,0.5,0.75
7,Hot,Yellow,0,1,3,0.4,4,0.708399,0.75,0.105361,0.9,0.74,1,-0.75,-0.5,-0.25
8,Hot,Yellow,1,1,3,0.4,4,0.708399,0.75,0.105361,0.675,0.74,1,-0.75,-0.5,-0.25
9,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826,0.85,0.9,1,0.25,-0.5,-0.25


# 17. Polynomial Encoding

In [26]:
pe = ce.PolynomialEncoder(cols=['Temperature'])
pe_df = pe.fit_transform(df['Temperature'],df.Target)
pd.concat([df,pe_df],axis=1)

Unnamed: 0,Temperature,Color,Target,Temp_ce_oe,Temperature_Order,Temp_Freq_Enc,Temp_Count_Enc,Temperature_ce_TarEnc,Temperature_tar_enc,Temperature_ce_WOE,Temperature_ce_CBE,Temperature_ce_JSE,intercept,Temperature_0,Temperature_1,Temperature_2
0,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361,0.7,0.74,1,-0.67082,0.5,-0.223607
1,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826,0.7,0.9,1,-0.223607,-0.5,0.67082
2,Very Hot,Blue,1,3,4,0.1,1,0.739033,1.0,0.0,0.7,0.85,1,0.223607,-0.5,-0.67082
3,Warm,Blue,0,4,2,0.3,3,0.643363,0.333333,-0.993252,0.7,0.425,1,0.67082,0.5,0.223607
4,Hot,Red,1,1,3,0.4,4,0.708399,0.75,0.105361,0.85,0.74,1,-0.67082,0.5,-0.223607
5,Warm,Yellow,0,4,2,0.3,3,0.643363,0.333333,-0.993252,0.35,0.425,1,0.67082,0.5,0.223607
6,Warm,Red,1,4,2,0.3,3,0.643363,0.333333,-0.993252,0.233333,0.425,1,0.67082,0.5,0.223607
7,Hot,Yellow,0,1,3,0.4,4,0.708399,0.75,0.105361,0.9,0.74,1,-0.67082,0.5,-0.223607
8,Hot,Yellow,1,1,3,0.4,4,0.708399,0.75,0.105361,0.675,0.74,1,-0.67082,0.5,-0.223607
9,Cold,Yellow,1,2,1,0.2,2,0.742555,1.0,0.510826,0.85,0.9,1,-0.223607,-0.5,0.67082


# 18. MultiLabelBinarizer

In [27]:
data = {'Type':[['fruits','vegitables'],['animals','vegitables'],['animals','fruits'],['vehicals','fruits']]}
df = pd.DataFrame(data)
df

Unnamed: 0,Type
0,"[fruits, vegitables]"
1,"[animals, vegitables]"
2,"[animals, fruits]"
3,"[vehicals, fruits]"


In [28]:
# importing MultiLabelBinarizer
from sklearn.preprocessing import MultiLabelBinarizer

# instantiating MultiLabelBinarizer
mlb = MultiLabelBinarizer()
types_encoded = pd.DataFrame(mlb.fit_transform(df['Type']),columns=mlb.classes_)
types_encoded.head()

Unnamed: 0,animals,fruits,vegitables,vehicals
0,0,1,1,0
1,1,0,1,0
2,1,1,0,0
3,0,1,0,1
