In [10]:
import pandas as pd



In [11]:
df = pd.DataFrame([
    ['green', 'M', 10.1, 'class2'],
    ['red', 'L', 13.5, 'class1'],
    ['blue', 'XL', 15.3, 'class2']])
df

Unnamed: 0,0,1,2,3
0,green,M,10.1,class2
1,red,L,13.5,class1
2,blue,XL,15.3,class2


In [12]:
df.columns = ['color', 'size', 'price', 'classlabel']
df

Unnamed: 0,color,size,price,classlabel
0,green,M,10.1,class2
1,red,L,13.5,class1
2,blue,XL,15.3,class2


In [13]:
# Mapping ordinal features
size_mapping = {'XL': 3,
                'L': 2,
                'M': 1} # a dictionary for mapping (M + 2 = L + 1 = XL)
df_size_num = df.copy()
df_size_num['size'] = df['size'].map(size_mapping)

In [14]:
df

Unnamed: 0,color,size,price,classlabel
0,green,M,10.1,class2
1,red,L,13.5,class1
2,blue,XL,15.3,class2


In [15]:
df_size_num

Unnamed: 0,color,size,price,classlabel
0,green,1,10.1,class2
1,red,2,13.5,class1
2,blue,3,15.3,class2


In [21]:
# reverse-mapping dictionary
inv_size_mapping = {numbers: letters for letters, numbers in size_mapping.items()}
inv_size_mapping

{3: 'XL', 2: 'L', 1: 'M'}

In [24]:
df_size_let = df_size_num.copy()
df_size_let['size'] = df_size_num['size'].map(inv_size_mapping)
df_size_let

Unnamed: 0,color,size,price,classlabel
0,green,M,10.1,class2
1,red,L,13.5,class1
2,blue,XL,15.3,class2


In [33]:
# Encoding class labels
import numpy as np
# np.unique(df['classlabel'])
# for a, b in enumerate(np.unique(df['classlabel'])):
#     print("%i %s" % (a, b))
class_mapping = {label: idx for idx, label in enumerate(np.unique(df['classlabel']))}
class_mapping

{'class1': 0, 'class2': 1}

In [38]:
df_enc_classes = df_size_num.copy()
df_enc_classes['classlabel'] = df_size_num['classlabel'].map(class_mapping)
df_enc_classes

Unnamed: 0,color,size,price,classlabel
0,green,1,10.1,1
1,red,2,13.5,0
2,blue,3,15.3,1


In [42]:
# Reverse mapping for recovering class labels
inv_class_mapping = {i: l for l, i in class_mapping.items()}
inv_class_mapping

{0: 'class1', 1: 'class2'}

In [45]:
df_deenc_classes = df_enc_classes.copy()
df_deenc_classes['classlabel'] = df_enc_classes['classlabel'].map(inv_class_mapping)
df_deenc_classes

Unnamed: 0,color,size,price,classlabel
0,green,1,10.1,class2
1,red,2,13.5,class1
2,blue,3,15.3,class2


In [60]:
###########################################################
# Alternative label encoder already implemented in Python #
###########################################################
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
label_encoder.fit_transform(df['classlabel'].values) # equivalent to  label_encoder.fit(df['classlabel'].values).transform(df['classlabel'].values)
df_enc_le = df.copy()
df_enc_le['classlabel'] = label_encoder.fit_transform(df['classlabel'].values)
df_enc_le

Unnamed: 0,color,size,price,classlabel
0,green,M,10.1,1
1,red,L,13.5,0
2,blue,XL,15.3,1


In [59]:
# inverse transform using LabelEncoder
df_deenc_le = df_enc_le.copy()
df_deenc_le['classlabel'] = label_encoder.inverse_transform(df_enc_le['classlabel'].values)
df_deenc_le

Unnamed: 0,color,size,price,classlabel
0,green,M,10.1,class2
1,red,L,13.5,class1
2,blue,XL,15.3,class2


In [69]:
####################
# One-hot-encoding #
####################
from sklearn.preprocessing import OneHotEncoder

X = df[['color', 'size', 'price']].values
color_ohe = OneHotEncoder()
X_enc = color_ohe.fit_transform(X[:,0].reshape(-1, 1))
print(X_enc)

  (0, 1)	1.0
  (1, 2)	1.0
  (2, 0)	1.0


In [86]:
# transform columns in a multi-feature array
from sklearn.compose import ColumnTransformer

X = df[['color', 'size', 'price']].values
c_transf = ColumnTransformer([('onehot', OneHotEncoder(), [0]), 
                              ('nothing', 'passthrough', [1, 2])])
c_transf
c_transf.fit_transform(X)

array([[0.0, 1.0, 0.0, 'M', 10.1],
       [0.0, 0.0, 1.0, 'L', 13.5],
       [1.0, 0.0, 0.0, 'XL', 15.3]], dtype=object)

In [95]:
# One-hot-encoding using pandas.get_dummies
df_dummies_color = pd.get_dummies(df['color'], drop_first=True)
df_dummies_color

Unnamed: 0,green,red
0,1,0
1,0,1
2,0,0
