In [14]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,OrdinalEncoder,OneHotEncoder

In [15]:
df=sns.load_dataset('tips')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [16]:
df.isnull().sum()

total_bill    0
tip           0
sex           0
smoker        0
day           0
time          0
size          0
dtype: int64

# Labelencoder

In [17]:
df.dtypes

total_bill     float64
tip            float64
sex           category
smoker        category
day           category
time          category
size             int64
dtype: object

In [18]:
le_sex=LabelEncoder()
le_smoker=LabelEncoder()
le_day=LabelEncoder()
le_time=LabelEncoder()

df['sex']=le_sex.fit_transform(df['sex'])
df['smoker']=le_smoker.fit_transform(df['smoker'])
df['day']=le_day.fit_transform(df['day'])
df['time']=le_time.fit_transform(df['time'])

df.head()


Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,0,0,2,0,2
1,10.34,1.66,1,0,2,0,3
2,21.01,3.5,1,0,2,0,3
3,23.68,3.31,1,0,2,0,2
4,24.59,3.61,0,0,2,0,4


In [19]:
df['sex']=le_sex.inverse_transform(df['sex'])
df['smoker']=le_smoker.inverse_transform(df['smoker'])
df['day']=le_day.inverse_transform(df['day'])
df['time']=le_time.inverse_transform(df['time'])

df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


# one Hot encoding

In [21]:
cat_columns=['sex','smoker']
encoder=OneHotEncoder(sparse=False)
new_df=pd.DataFrame(encoder.fit_transform(df[cat_columns]))

df=pd.concat([df,new_df],axis=1)
df.head()



Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,0,1,2,3
0,16.99,1.01,Female,No,Sun,Dinner,2,1.0,0.0,1.0,0.0
1,10.34,1.66,Male,No,Sun,Dinner,3,0.0,1.0,1.0,0.0
2,21.01,3.5,Male,No,Sun,Dinner,3,0.0,1.0,1.0,0.0
3,23.68,3.31,Male,No,Sun,Dinner,2,0.0,1.0,1.0,0.0
4,24.59,3.61,Female,No,Sun,Dinner,4,1.0,0.0,1.0,0.0


In [22]:
encoder.categories_

[array(['Female', 'Male'], dtype=object), array(['No', 'Yes'], dtype=object)]

#### inverse

In [23]:
original_categories = {col: encoder.categories_[i] for i, col in enumerate(cat_columns)}
original_categories

{'sex': array(['Female', 'Male'], dtype=object),
 'smoker': array(['No', 'Yes'], dtype=object)}

In [24]:
feature_name=[]
for i,col in enumerate(cat_columns):
    for category in encoder.categories_[i]:
        feature_name.append(f'{col}_{category}')
new_df=pd.DataFrame(new_df,columns=feature_name)

df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,0,1,2,3
0,16.99,1.01,Female,No,Sun,Dinner,2,1.0,0.0,1.0,0.0
1,10.34,1.66,Male,No,Sun,Dinner,3,0.0,1.0,1.0,0.0
2,21.01,3.5,Male,No,Sun,Dinner,3,0.0,1.0,1.0,0.0
3,23.68,3.31,Male,No,Sun,Dinner,2,0.0,1.0,1.0,0.0
4,24.59,3.61,Female,No,Sun,Dinner,4,1.0,0.0,1.0,0.0


# OrdinalEncoder

In [25]:
category_columns=['day','time']
encoder=OrdinalEncoder()
category_df=pd.DataFrame(encoder.fit_transform(df[category_columns]))

df=pd.concat([df,category_df],axis=1)
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,0,1,2,3,0.1,1.1
0,16.99,1.01,Female,No,Sun,Dinner,2,1.0,0.0,1.0,0.0,2.0,0.0
1,10.34,1.66,Male,No,Sun,Dinner,3,0.0,1.0,1.0,0.0,2.0,0.0
2,21.01,3.5,Male,No,Sun,Dinner,3,0.0,1.0,1.0,0.0,2.0,0.0
3,23.68,3.31,Male,No,Sun,Dinner,2,0.0,1.0,1.0,0.0,2.0,0.0
4,24.59,3.61,Female,No,Sun,Dinner,4,1.0,0.0,1.0,0.0,2.0,0.0


In [26]:
feature_name=[]
for i,col in enumerate(cat_columns):
    for category in encoder.categories_[i]:
        feature_name.append(f'{col}_{category}')
new_df=pd.DataFrame(new_df,columns=feature_name)

df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,0,1,2,3,0.1,1.1
0,16.99,1.01,Female,No,Sun,Dinner,2,1.0,0.0,1.0,0.0,2.0,0.0
1,10.34,1.66,Male,No,Sun,Dinner,3,0.0,1.0,1.0,0.0,2.0,0.0
2,21.01,3.5,Male,No,Sun,Dinner,3,0.0,1.0,1.0,0.0,2.0,0.0
3,23.68,3.31,Male,No,Sun,Dinner,2,0.0,1.0,1.0,0.0,2.0,0.0
4,24.59,3.61,Female,No,Sun,Dinner,4,1.0,0.0,1.0,0.0,2.0,0.0
