# Label Encoding

In [2]:
import pandas as pd
import numpy as np
from sklearn import preprocessing

In [1]:
lis = ['ikan', 'ikan', 'anjing', 'singa', 'gajah', 'anjing', 'beruang', 'burung', 'burung']

In [3]:
df_hewan = pd.DataFrame(data=pd.Series(lis),
                        columns = ['Jenis'])

In [4]:
df_hewan

Unnamed: 0,Jenis
0,ikan
1,ikan
2,anjing
3,singa
4,gajah
5,anjing
6,beruang
7,burung
8,burung


In [6]:
# feature engineering

label_enc = preprocessing.LabelEncoder()
label_enc.fit(df_hewan['Jenis'].values)
df_hewan['Jenis_Encoded'] = label_enc.transform(df_hewan['Jenis'].values)

df_hewan

Unnamed: 0,Jenis,Jenis_Encoded
0,ikan,4
1,ikan,4
2,anjing,0
3,singa,5
4,gajah,3
5,anjing,0
6,beruang,1
7,burung,2
8,burung,2


# One Hot Encoding

In [7]:
lis = ['ikan', 'ikan', 'anjing', 'singa', 'gajah', 'anjing', 'beruang', 'burung', 'burung']
df_hewan = pd.DataFrame(data=pd.Series(lis),
                        columns = ['Jenis'])

oh_enc = preprocessing.OneHotEncoder()
jenis_ohe = pd.DataFrame(oh_enc.fit_transform(df_hewan[['Jenis']]).toarray())
df_hewan = df_hewan.join(jenis_ohe)

df_hewan

Unnamed: 0,Jenis,0,1,2,3,4,5
0,ikan,0.0,0.0,0.0,0.0,1.0,0.0
1,ikan,0.0,0.0,0.0,0.0,1.0,0.0
2,anjing,1.0,0.0,0.0,0.0,0.0,0.0
3,singa,0.0,0.0,0.0,0.0,0.0,1.0
4,gajah,0.0,0.0,0.0,1.0,0.0,0.0
5,anjing,1.0,0.0,0.0,0.0,0.0,0.0
6,beruang,0.0,1.0,0.0,0.0,0.0,0.0
7,burung,0.0,0.0,1.0,0.0,0.0,0.0
8,burung,0.0,0.0,1.0,0.0,0.0,0.0


# Get Dummies

In [9]:
lis = ['ikan', 'ikan', 'anjing', 'singa', 'gajah', 'anjing', 'beruang', 'burung', 'burung']
df_hewan = pd.DataFrame(data=pd.Series(lis),
                        columns = ['Jenis'])

dum_df = pd.get_dummies(df_hewan, columns=['Jenis'], prefix=['hewan'])

df_hewan_with_dummy = df_hewan.join(dum_df)
df_hewan_with_dummy

Unnamed: 0,Jenis,hewan_anjing,hewan_beruang,hewan_burung,hewan_gajah,hewan_ikan,hewan_singa
0,ikan,0,0,0,0,1,0
1,ikan,0,0,0,0,1,0
2,anjing,1,0,0,0,0,0
3,singa,0,0,0,0,0,1
4,gajah,0,0,0,1,0,0
5,anjing,1,0,0,0,0,0
6,beruang,0,1,0,0,0,0
7,burung,0,0,1,0,0,0
8,burung,0,0,1,0,0,0


In [10]:
df_hewan_with_dummy.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Jenis          9 non-null      object
 1   hewan_anjing   9 non-null      uint8 
 2   hewan_beruang  9 non-null      uint8 
 3   hewan_burung   9 non-null      uint8 
 4   hewan_gajah    9 non-null      uint8 
 5   hewan_ikan     9 non-null      uint8 
 6   hewan_singa    9 non-null      uint8 
dtypes: object(1), uint8(6)
memory usage: 254.0+ bytes
