#### 5.1 Encoding Nominal Categorical Features

In [3]:
import numpy as np

from sklearn.preprocessing import LabelBinarizer, MultiLabelBinarizer

feature = np.array([["Texas"],
                    ["California"],
                    ["Texas"],
                    ["Delaware"],
                    ["Texas"]])

one_hot = LabelBinarizer()

one_hot.fit_transform(feature)

array([[0, 0, 1],
       [1, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 0, 1]])

#### 5.2 Encoding Ordinal Categorical Features

In [2]:
# Ordinal random variable - its nominal random variable having meaningful order.

import pandas as pd
dataframe = pd.DataFrame({"Score": ["Low", "Low", "Medium", "Medium", "High"]})

# Create mapper
scale_mapper = {"Low":1,
                "Medium":2,
                "High":3}

# Replace feature values with scale
dataframe["Score"].replace(scale_mapper)

# The distance between these ordinal variables is same, what if the distance is not same?

0    1
1    1
2    2
3    2
4    3
Name: Score, dtype: int64

In [4]:
dataframe = pd.DataFrame({"Score": ["Low",
                                    "Low",
                                    "Medium",
                                    "Medium",
                                    "High",
                                    "Barely More Than Medium"]})
scale_mapper = {"Low":1,
                "Medium":2,
                "Barely More Than Medium": 3,
                "High":4}

dataframe["Score"].replace(scale_mapper)

# In this case the distance between "low and medium" is same as "medium and barely more than medium" and this is not accurate.

0    1
1    1
2    2
3    2
4    4
5    3
Name: Score, dtype: int64

In [6]:
# The best approach is to be conscious about the numerical values mapped to classes:

scale_mapper = {"Low":1,
                "Medium":2,
                "Barely More Than Medium": 2.1,
                "High":3}

dataframe["Score"].replace(scale_mapper)

0    1.0
1    1.0
2    2.0
3    2.0
4    3.0
5    2.1
Name: Score, dtype: float64