In [68]:
import pandas as pd
import numpy as np

data = {'Temperature': ['Hot', 'Cold', 'Very Hot', 'Warm', 'Hot', 'Warm', 'Warm', 'Hot', 'Hot', 'Cold'],
        'Color': ['Red', 'Yellow', 'Blue', 'Blue', 'Red', 'Yellow', 'Red', 'Yellow', 'Yellow', 'Yellow'],
        'Target': [1,1,1,0,1,0,1,0,1,1]}

df = pd.DataFrame(data)

In [69]:
df

Unnamed: 0,Temperature,Color,Target
0,Hot,Red,1
1,Cold,Yellow,1
2,Very Hot,Blue,1
3,Warm,Blue,0
4,Hot,Red,1
5,Warm,Yellow,0
6,Warm,Red,1
7,Hot,Yellow,0
8,Hot,Yellow,1
9,Cold,Yellow,1


# One Hot Encoding

In [70]:
df_ohe = pd.get_dummies(df, prefix=['Temperature'], columns=['Temperature'], drop_first=False)

In [71]:
df_ohe

Unnamed: 0,Color,Target,Temperature_Cold,Temperature_Hot,Temperature_Very Hot,Temperature_Warm
0,Red,1,0,1,0,0
1,Yellow,1,1,0,0,0
2,Blue,1,0,0,1,0
3,Blue,0,0,0,0,1
4,Red,1,0,1,0,0
5,Yellow,0,0,0,0,1
6,Red,1,0,0,0,1
7,Yellow,0,0,1,0,0
8,Yellow,1,0,1,0,0
9,Yellow,1,1,0,0,0


In [72]:
from sklearn.preprocessing import OneHotEncoder

ohe = OneHotEncoder()

temperature_ohe = ohe.fit_transform(df['Temperature'].values.reshape(-1,1)).toarray()

temperature_ohe_df = pd.DataFrame(temperature_ohe, 
                                  columns = ["Temperature_" + str(ohe.categories_[0][i]) 
                                             for i in range(len(ohe.categories_[0]))])

df_ohe = pd.concat([df, temperature_ohe_df], axis = 1)

In [73]:
df_ohe

Unnamed: 0,Temperature,Color,Target,Temperature_Cold,Temperature_Hot,Temperature_Very Hot,Temperature_Warm
0,Hot,Red,1,0.0,1.0,0.0,0.0
1,Cold,Yellow,1,1.0,0.0,0.0,0.0
2,Very Hot,Blue,1,0.0,0.0,1.0,0.0
3,Warm,Blue,0,0.0,0.0,0.0,1.0
4,Hot,Red,1,0.0,1.0,0.0,0.0
5,Warm,Yellow,0,0.0,0.0,0.0,1.0
6,Warm,Red,1,0.0,0.0,0.0,1.0
7,Hot,Yellow,0,0.0,1.0,0.0,0.0
8,Hot,Yellow,1,0.0,1.0,0.0,0.0
9,Cold,Yellow,1,1.0,0.0,0.0,0.0


# Label Encoding

In [74]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

df['Temperature_label_encoded'] = label_encoder.fit_transform(df['Temperature'])

In [75]:
df

Unnamed: 0,Temperature,Color,Target,Temperature_label_encoded
0,Hot,Red,1,1
1,Cold,Yellow,1,0
2,Very Hot,Blue,1,2
3,Warm,Blue,0,3
4,Hot,Red,1,1
5,Warm,Yellow,0,3
6,Warm,Red,1,3
7,Hot,Yellow,0,1
8,Hot,Yellow,1,1
9,Cold,Yellow,1,0


# Ordinal Encoding

In [76]:
temp_dict = {'Cold': 1,
             'Warm': 2,
             'Hot': 3,
             'Very Hot': 4}

df['Temperature_ordinal_encoded'] = df['Temperature'].map(temp_dict)

In [77]:
df

Unnamed: 0,Temperature,Color,Target,Temperature_label_encoded,Temperature_ordinal_encoded
0,Hot,Red,1,1,3
1,Cold,Yellow,1,0,1
2,Very Hot,Blue,1,2,4
3,Warm,Blue,0,3,2
4,Hot,Red,1,1,3
5,Warm,Yellow,0,3,2
6,Warm,Red,1,3,2
7,Hot,Yellow,0,1,3
8,Hot,Yellow,1,1,3
9,Cold,Yellow,1,0,1


# Frequency Encoding

In [78]:
frequency_encoder = df.groupby('Temperature').size() / len(df)

frequency_encoder

Temperature
Cold        0.2
Hot         0.4
Very Hot    0.1
Warm        0.3
dtype: float64

In [79]:
df['Temperature_freq_encoded'] = df['Temperature'].map(frequency_encoder)

In [80]:
df

Unnamed: 0,Temperature,Color,Target,Temperature_label_encoded,Temperature_ordinal_encoded,Temperature_freq_encoded
0,Hot,Red,1,1,3,0.4
1,Cold,Yellow,1,0,1,0.2
2,Very Hot,Blue,1,2,4,0.1
3,Warm,Blue,0,3,2,0.3
4,Hot,Red,1,1,3,0.4
5,Warm,Yellow,0,3,2,0.3
6,Warm,Red,1,3,2,0.3
7,Hot,Yellow,0,1,3,0.4
8,Hot,Yellow,1,1,3,0.4
9,Cold,Yellow,1,0,1,0.2


# Target (Mean) Encoding

In [81]:
mean_encoder = df.groupby('Temperature')['Target'].mean()

mean_encoder

Temperature
Cold        1.000000
Hot         0.750000
Very Hot    1.000000
Warm        0.333333
Name: Target, dtype: float64

In [82]:
df['Temperature_mean_encoded'] = df['Temperature'].map(mean_encoder)

In [83]:
df

Unnamed: 0,Temperature,Color,Target,Temperature_label_encoded,Temperature_ordinal_encoded,Temperature_freq_encoded,Temperature_mean_encoded
0,Hot,Red,1,1,3,0.4,0.75
1,Cold,Yellow,1,0,1,0.2,1.0
2,Very Hot,Blue,1,2,4,0.1,1.0
3,Warm,Blue,0,3,2,0.3,0.333333
4,Hot,Red,1,1,3,0.4,0.75
5,Warm,Yellow,0,3,2,0.3,0.333333
6,Warm,Red,1,3,2,0.3,0.333333
7,Hot,Yellow,0,1,3,0.4,0.75
8,Hot,Yellow,1,1,3,0.4,0.75
9,Cold,Yellow,1,0,1,0.2,1.0


# Reference

[All about Categorical Variable Encoding](https://towardsdatascience.com/all-about-categorical-variable-encoding-305f3361fd02)