In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, OrdinalEncoder
import category_encoders as ce

In [3]:
label_data = ['cat', 'dog', 'mouse', 'dog', 'cat']
onehot_data = np.array(['cat', 'dog', 'mouse', 'dog', 'cat']).reshape(-1, 1)
binary_data = pd.Series(['cat', 'dog', 'mouse', 'dog', 'cat'])
target_data = pd.DataFrame({'category': ['A', 'B', 'A', 'C', 'B', 'C', 'A'], 'target': [1, 0, 1, 0, 1, 0, 1]})
frequency_data = pd.Series(['cat', 'dog', 'mouse', 'dog', 'cat'])
ordinal_data = np.array(['low', 'medium', 'high', 'medium', 'low']).reshape(-1, 1)

# One Hot Encoding

In [4]:
label_encoder = LabelEncoder()
label_encoded_data = label_encoder.fit_transform(label_data)
label_decoded_data = label_encoder.inverse_transform(label_encoded_data)

In [5]:
print(f"Label Encoded: {label_encoded_data}")
print(f"Label Decoded: {label_decoded_data}")

Label Encoded: [0 1 2 1 0]
Label Decoded: ['cat' 'dog' 'mouse' 'dog' 'cat']


In [7]:
onehot_encoder = OneHotEncoder()
onehot_encoded_data = onehot_encoder.fit_transform(onehot_data)

In [8]:
print(f"One-Hot Encoded:\n{onehot_encoded_data}")

One-Hot Encoded:
  (0, 0)	1.0
  (1, 1)	1.0
  (2, 2)	1.0
  (3, 1)	1.0
  (4, 0)	1.0


# Binary Encoding

In [10]:
binary_encoder = ce.BinaryEncoder()
binary_encoded_data = binary_encoder.fit_transform(binary_data)

In [11]:
print(f"Binary Encoded:\n{binary_encoded_data}")

Binary Encoded:
   0_0  0_1
0    0    1
1    1    0
2    1    1
3    1    0
4    0    1


# Target Encoding

In [12]:
target_encoder = ce.TargetEncoder()
target_encoded_data = target_encoder.fit_transform(target_data['category'], target_data['target'])

In [13]:
print(f"Target Encoded:\n{target_encoded_data}")

Target Encoded:
   category
0  0.637628
1  0.561296
2  0.637628
3  0.490371
4  0.561296
5  0.490371
6  0.637628


# Frequency Encoding

In [14]:
frequency = frequency_data.value_counts() / len(frequency_data)
frequency_encoded_data = frequency_data.map(frequency)

In [15]:
print(f"Frequency:\n{frequency}")
print(f"Frequency Encoded: {frequency_encoded_data}")

Frequency:
cat      0.4
dog      0.4
mouse    0.2
Name: count, dtype: float64
Frequency Encoded: 0    0.4
1    0.4
2    0.2
3    0.4
4    0.4
dtype: float64


# Ordinal Encoding

In [16]:
ordinal_encoder = OrdinalEncoder(categories=[['low', 'medium', 'high']])
ordinal_encoded_data = ordinal_encoder.fit_transform(ordinal_data)

In [17]:
print(f"Ordinal Encoded: {ordinal_encoded_data}")

Ordinal Encoded: [[0.]
 [1.]
 [2.]
 [1.]
 [0.]]
