# Q.1 - Map ordinal features: shirt_size to numbers

In [1]:
import pandas as pd
import numpy as np
df = pd.DataFrame([['yellow','XL', 1085.07, 'classC'],
                   ['blue',  'L',  339.61,  'classB'],
                   ['green', 'L',  400.0,   'classB'],
                   ['green', 'M',  238,     'classB'],
                   ['grey',  'S',  52.99,   'classA']])

df.columns = ['color', 'shirt_size', 'price', 'class_label']

In [2]:
# Map ordinal features: shirt_size to numbers
size_mapping = {'XL': 3,
                'L' : 2,
                'M' : 1,
                'S' : 0}

df['shirt_size'] = df['shirt_size'].map(size_mapping)
df

Unnamed: 0,color,shirt_size,price,class_label
0,yellow,3,1085.07,classC
1,blue,2,339.61,classB
2,green,2,400.0,classB
3,green,1,238.0,classB
4,grey,0,52.99,classA


# Q.2- One-hot encode class_label 

In [3]:
X = df.values 
# library for one hot encoding
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
# [3] represent column number which you want to encode
# class_label is the 3rd column (index starts from zero) in the dataset 
transform = ColumnTransformer([('encoder', OneHotEncoder(), [3])], remainder='passthrough')
X = np.array(transform.fit_transform(X), dtype = np.str)

In [4]:
X 

array([['0.0', '0.0', '1.0', 'yellow', '3', '1085.07'],
       ['0.0', '1.0', '0.0', 'blue', '2', '339.61'],
       ['0.0', '1.0', '0.0', 'green', '2', '400.0'],
       ['0.0', '1.0', '0.0', 'green', '1', '238.0'],
       ['1.0', '0.0', '0.0', 'grey', '0', '52.99']], dtype='<U7')

In [5]:
# get the dataframe with column name
df = pd.DataFrame(X,columns=['classA', 'classB', 'classC','color', 'shirt_size', 'price'])
df

Unnamed: 0,classA,classB,classC,color,shirt_size,price
0,0.0,0.0,1.0,yellow,3,1085.07
1,0.0,1.0,0.0,blue,2,339.61
2,0.0,1.0,0.0,green,2,400.0
3,0.0,1.0,0.0,green,1,238.0
4,1.0,0.0,0.0,grey,0,52.99


# Q.3 -  Use Get_Dummies to encode shirt_size

In [6]:
shirt_size = pd.get_dummies(df['shirt_size'],prefix='shirt_size')
df = df.drop('shirt_size', 1) # drop the shirt_size column from df

# get the dataframe with encoded shirt_size
df = pd.concat([df, shirt_size], axis=1)

df

Unnamed: 0,classA,classB,classC,color,price,shirt_size_0,shirt_size_1,shirt_size_2,shirt_size_3
0,0.0,0.0,1.0,yellow,1085.07,0,0,0,1
1,0.0,1.0,0.0,blue,339.61,0,0,1,0
2,0.0,1.0,0.0,green,400.0,0,0,1,0
3,0.0,1.0,0.0,green,238.0,0,1,0,0
4,1.0,0.0,0.0,grey,52.99,1,0,0,0
