In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [3]:
data = pd.read_csv('temperature_data.csv')
data

Unnamed: 0,Temperature,Color,Target
0,Hot,Red,1
1,Cold,Yellow,1
2,Very Hot,Blue,1
3,Warm,Blue,0
4,Hot,Red,1
5,Warm,Yellow,0
6,Warm,Red,1
7,Hot,Yellow,0
8,Hot,Yellow,1
9,Cold,Yellow,1


## Observations : 

- Color variable contains the names of different color eg: Red, Yellow, Blue (Unordered data)
- Temperature variable contains order among the values like Very Hot, Hot, Warm, Cold(Ordered data)

## Applying Label Encoding on Temperature

In [4]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le

In [5]:
data['Temperature'] = le.fit_transform(data[['Temperature']])
data

Unnamed: 0,Temperature,Color,Target
0,1,Red,1
1,0,Yellow,1
2,2,Blue,1
3,3,Blue,0
4,1,Red,1
5,3,Yellow,0
6,3,Red,1
7,1,Yellow,0
8,1,Yellow,1
9,0,Yellow,1


## Manual Encoding of Temperature Column over label Encoding

In [10]:
data = pd.read_csv('temperature_data.csv')
data

Unnamed: 0,Temperature,Color,Target
0,Hot,Red,1
1,Cold,Yellow,1
2,Very Hot,Blue,1
3,Warm,Blue,0
4,Hot,Red,1
5,Warm,Yellow,0
6,Warm,Red,1
7,Hot,Yellow,0
8,Hot,Yellow,1
9,Cold,Yellow,1


In [11]:
dic = {
    'Cold' : 0,
    'Warm' : 1,
    'Hot' : 2,
    'Very Hot' : 3
}
dic

{'Cold': 0, 'Warm': 1, 'Hot': 2, 'Very Hot': 3}

In [12]:
data['Temperature'] = data['Temperature'].replace(dic)
data

Unnamed: 0,Temperature,Color,Target
0,2,Red,1
1,0,Yellow,1
2,3,Blue,1
3,1,Blue,0
4,2,Red,1
5,1,Yellow,0
6,1,Red,1
7,2,Yellow,0
8,2,Yellow,1
9,0,Yellow,1


In [16]:
data[['Red', 'Blue', 'Yellow']] = pd.get_dummies(data['Color'])
data

Unnamed: 0,Temperature,Color,Target,Red,Blue,Yellow
0,2,Red,1,0,1,0
1,0,Yellow,1,0,0,1
2,3,Blue,1,1,0,0
3,1,Blue,0,1,0,0
4,2,Red,1,0,1,0
5,1,Yellow,0,0,0,1
6,1,Red,1,0,1,0
7,2,Yellow,0,0,0,1
8,2,Yellow,1,0,0,1
9,0,Yellow,1,0,0,1


In [20]:
data = data.drop(['Red', 'Yellow', 'Blue'], axis = 1)
data

KeyError: "['Red', 'Yellow', 'Blue'] not found in axis"

## One Hot Encoding using sklearn OneHotEncoder() 

Steps : 
1. Import the OneHotEncoder() from the sklearn library
2. Apply the ohe() on the desired columns
3. Convert the step 2 output to a DataFrame
4. concatenate original data and step 3 output 
5. drop the original text columns from the data

In [21]:
data

Unnamed: 0,Temperature,Color,Target
0,2,Red,1
1,0,Yellow,1
2,3,Blue,1
3,1,Blue,0
4,2,Red,1
5,1,Yellow,0
6,1,Red,1
7,2,Yellow,0
8,2,Yellow,1
9,0,Yellow,1


In [22]:
### 1. Import onehotencoder() from sklearn library

from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder()
ohe

In [23]:
## Step 2 : Apply the onehotencoder() on the desired column (color)

data_ohe = ohe.fit_transform(data[['Color']])
data_ohe

<10x3 sparse matrix of type '<class 'numpy.float64'>'
	with 10 stored elements in Compressed Sparse Row format>

In [24]:
data_ohe.toarray()

array([[0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.]])

In [25]:
## Step 3 : Convert the Step 2 output into a DataFrame

data_ohe = pd.DataFrame(data_ohe.toarray(), columns = ['Blue', 'Red', 'Yellow'])
data_ohe

Unnamed: 0,Blue,Red,Yellow
0,0.0,1.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,1.0,0.0
5,0.0,0.0,1.0
6,0.0,1.0,0.0
7,0.0,0.0,1.0
8,0.0,0.0,1.0
9,0.0,0.0,1.0


In [27]:
## Step 4 : concatenate step 3 output with original data

data = pd.concat([data, data_ohe], axis = 1)
data

Unnamed: 0,Temperature,Color,Target,Blue,Red,Yellow
0,2,Red,1,0.0,1.0,0.0
1,0,Yellow,1,0.0,0.0,1.0
2,3,Blue,1,1.0,0.0,0.0
3,1,Blue,0,1.0,0.0,0.0
4,2,Red,1,0.0,1.0,0.0
5,1,Yellow,0,0.0,0.0,1.0
6,1,Red,1,0.0,1.0,0.0
7,2,Yellow,0,0.0,0.0,1.0
8,2,Yellow,1,0.0,0.0,1.0
9,0,Yellow,1,0.0,0.0,1.0


In [29]:
## Step 5 : Delete the original text column from the data

data = data.drop('Color', axis = 1)
data

Unnamed: 0,Temperature,Target,Blue,Red,Yellow
0,2,1,0.0,1.0,0.0
1,0,1,0.0,0.0,1.0
2,3,1,1.0,0.0,0.0
3,1,0,1.0,0.0,0.0
4,2,1,0.0,1.0,0.0
5,1,0,0.0,0.0,1.0
6,1,1,0.0,1.0,0.0
7,2,0,0.0,0.0,1.0
8,2,1,0.0,0.0,1.0
9,0,1,0.0,0.0,1.0
