In [15]:
import pandas as pd
import numpy as np

### Handling Gategorical Data

In [60]:
T_shirts=[
    ['green', 'M',99.9,'polo'],
    ['blue', 'L',101.9,'sleeve'],
    ['red', 'XL',50.9,'high_nick']
]

In [61]:
df=pd.DataFrame(T_shirts)
df.columns = ['color', 'size','price','T_shirts_type']

In [62]:
df

Unnamed: 0,color,size,price,T_shirts_type
0,green,M,99.9,polo
1,blue,L,101.9,sleeve
2,red,XL,50.9,high_nick


#####  Handling label Data

In [9]:
## old method
# df.loc[df.T_shirts_type=='polo']=0
# df.loc[df.T_shirts_type=='sleeve']=0
# df.loc[df.T_shirts_type=='high_nick']=0

In [14]:
df

Unnamed: 0,color,size,price,T_shirts_type
0,green,M,99.9,polo
1,blue,L,101.9,sleeve
2,red,M,50.9,high_nick


In [16]:
np.unique(df.T_shirts_type)

array(['high_nick', 'polo', 'sleeve'], dtype=object)

In [19]:
for label in df.T_shirts_type:
    print(label)

polo
sleeve
high_nick


In [23]:
{label for label in np.unique(df.T_shirts_type)}

{'high_nick', 'polo', 'sleeve'}

In [25]:
## set return unique value
({label for label in np.unique(df.T_shirts_type)})

{'high_nick', 'polo', 'sleeve'}

In [27]:
{label:index for index,label in  enumerate(np.unique(df.T_shirts_type))}

{'high_nick': 0, 'polo': 1, 'sleeve': 2}

In [30]:
label_mapping={label:index for index,label in  enumerate(np.unique(df.T_shirts_type))}

In [31]:
df['T_shirts_type']=df['T_shirts_type'].map(label_mapping)

In [32]:
df

Unnamed: 0,color,size,price,T_shirts_type
0,green,M,99.9,1
1,blue,L,101.9,2
2,red,M,50.9,0


In [41]:
df

Unnamed: 0,color,size,price,T_shirts_type
0,green,M,99.9,polo
1,blue,L,101.9,sleeve
2,red,M,50.9,high_nick


In [36]:
from sklearn.preprocessing import LabelEncoder 

In [37]:
label_encoder=LabelEncoder()

In [44]:
y=label_encoder.fit_transform(df.T_shirts_type.values) ## label encoder prameters should be one dimensional array 

In [45]:
y

array([1, 2, 0])

#  Handling categorical Data 

### ordinal encoding

In [55]:
df

Unnamed: 0,color,size,price,T_shirts_type
0,green,M,99.9,polo
1,blue,L,101.9,sleeve
2,red,XL,50.9,high_nick


In [56]:
size_mapping={size:index for index,size in  enumerate(np.unique(df['size']))}
size_mapping

{'L': 0, 'M': 1, 'XL': 2}

In [57]:
df['size']=df['size'].map(size_mapping)
df

Unnamed: 0,color,size,price,T_shirts_type
0,green,1,99.9,polo
1,blue,0,101.9,sleeve
2,red,2,50.9,high_nick


In [59]:
from sklearn.preprocessing import OrdinalEncoder

In [65]:
df['size'].values

array(['M', 'L', 'XL'], dtype=object)

In [67]:
ordinal_encoder=OrdinalEncoder()
df['size']=ordinal_encoder.fit_transform(df['size'].values.reshape(-1,1)) # OrdinalEncoder parameters should be 2 dimensional

In [68]:
df

Unnamed: 0,color,size,price,T_shirts_type
0,green,1.0,99.9,polo
1,blue,0.0,101.9,sleeve
2,red,2.0,50.9,high_nick


In [74]:
df['class']=['classA','classB','classC']
df['counter']=['counterA','counterB','counterC']
df

Unnamed: 0,color,size,price,T_shirts_type,class,counter
0,green,1.0,99.9,polo,classA,counterA
1,blue,0.0,101.9,sleeve,classB,counterB
2,red,2.0,50.9,high_nick,classC,counterC


In [75]:
## example
df[['class','counter']]=ordinal_encoder.fit_transform(df[['class','counter']].values.reshape(-1,2)) # 2 number of class

In [76]:
df

Unnamed: 0,color,size,price,T_shirts_type,class,counter
0,green,1.0,99.9,polo,0.0,0.0
1,blue,0.0,101.9,sleeve,1.0,1.0
2,red,2.0,50.9,high_nick,2.0,2.0


### nominal encoding 

In [77]:
df

Unnamed: 0,color,size,price,T_shirts_type,class,counter
0,green,1.0,99.9,polo,0.0,0.0
1,blue,0.0,101.9,sleeve,1.0,1.0
2,red,2.0,50.9,high_nick,2.0,2.0


In [78]:
from sklearn.preprocessing import OneHotEncoder

In [82]:
one_hot_encoder=OneHotEncoder(sparse=False)
one_hot_encoder.fit_transform(df['color'].values.reshape(-1,1))

array([[0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.]])

# Column Transformer

In [83]:
T_shirts=[
    ['green', 'M',99.9,'polo'],
    ['blue', 'L',101.9,'sleeve'],
    ['red', 'XL',50.9,'high_nick']
]
df=pd.DataFrame(T_shirts)
df.columns = ['color', 'size','price','T_shirts_type']
df

Unnamed: 0,color,size,price,T_shirts_type
0,green,M,99.9,polo
1,blue,L,101.9,sleeve
2,red,XL,50.9,high_nick


In [84]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder() 
y=label_encoder.fit_transform(df['T_shirts_type'].values)
y

array([1, 2, 0])

In [85]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder , OrdinalEncoder

In [87]:
X=df[['color', 'size','price']]

In [89]:
one_hot_encoder = OneHotEncoder(sparse=False)
ordinal_encoder=OrdinalEncoder()
column_transformer = ColumnTransformer([
    ('one_hot_encoder',one_hot_encoder,[0]),
    ('ordinal_encoder',ordinal_encoder,[1]),
    ('nothing','passthrough',[2])
])
column_transformer=column_transformer.fit_transform(X)

In [90]:
column_transformer

array([[  0. ,   1. ,   0. ,   1. ,  99.9],
       [  1. ,   0. ,   0. ,   0. , 101.9],
       [  0. ,   0. ,   1. ,   2. ,  50.9]])