# Inverse Transforming of Categorical Data

In [17]:
# Import Liberaries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder,OneHotEncoder,OrdinalEncoder

In [18]:
# import data
df=sns.load_dataset('titanic')
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [19]:
# impute missing values for age, embarked, embark_town and deck
df['age'].fillna(df['age'].median(), inplace=True)
df['embarked'].fillna(df['embarked'].mode()[0], inplace=True)
df['embark_town'].fillna(df['embark_town'].mode()[0], inplace=True)
# drop deck column
df.drop('deck', axis=1, inplace=True)

In [21]:
le_sex=LabelEncoder()
le_class=LabelEncoder()
df['sex']=le_sex.fit_transform(df['sex'])
df['class']=le_class.fit_transform(df['class'])

In [22]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,embark_town,alive,alone
0,0,3,1,22.0,1,0,7.25,S,2,man,True,Southampton,no,False
1,1,1,0,38.0,1,0,71.2833,C,0,woman,False,Cherbourg,yes,False
2,1,3,0,26.0,0,0,7.925,S,2,woman,False,Southampton,yes,True
3,1,1,0,35.0,1,0,53.1,S,0,woman,False,Southampton,yes,False
4,0,3,1,35.0,0,0,8.05,S,2,man,True,Southampton,no,True


In [24]:
# Inverse Transform
df['sex']=le_sex.inverse_transform(df['sex'])
df['class']=le_class.inverse_transform(df['class'])


ValueError: y contains previously unseen labels: ['female' 'male']

In [25]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,Southampton,no,True


# One Hot Encoding

In [130]:
df=sns.load_dataset('titanic')

In [131]:
cat_columns=['sex','embarked']

In [75]:
encoder=OneHotEncoder(sparse=False)
encoded_df=pd.DataFrame(encoder.fit_transform(df[cat_columns]))
df=pd.concat([df,encoded_df],axis=1)
df.head()



Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,...,2,3,4,5,0,1,2.1,3.1,4.1,5.1
0,0,3,male,22.0,1,0,7.25,S,2.0,1.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
1,1,1,female,38.0,1,0,71.2833,C,0.0,2.0,...,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
2,1,3,female,26.0,0,0,7.925,S,2.0,2.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
3,1,1,female,35.0,1,0,53.1,S,0.0,2.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
4,0,3,male,35.0,0,0,8.05,S,2.0,1.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0


In [None]:
# Inverse transform into orignal categories
orignal_cat={col:encoder.categories_[i] for i in enumerate(cat_columns)}
feature_names = []
for i, col in enumerate(cat_columns):
    for category in encoder.categories_[i]:
        feature_names.append(f"{col}_{category}")

encoded_df = pd.DataFrame(encoded_df, columns=feature_names)

df.head()

In [106]:
df['class']

0       Third
1       First
2       Third
3       First
4       Third
        ...  
886    Second
887     First
888     Third
889     First
890     Third
Name: class, Length: 891, dtype: category
Categories (3, object): ['First', 'Second', 'Third']

# Ordinal Encoder

In [186]:
df=sns.load_dataset('titanic')

In [187]:
ca_col=df[['class']]

In [188]:
OE=OrdinalEncoder()
df1=OE.fit_transform(ca_col)
#df2=pd.DataFrame(df1,columns=ca_col)
#df3=pd.concat([df,df2],axis=1)
#df3.head()

In [189]:
df['class']=df1

In [190]:
df['class'].head()

0    2.0
1    0.0
2    2.0
3    0.0
4    2.0
Name: class, dtype: float64

In [192]:
decode=OE.inverse_transform(df[['class']])
decode

array([['Third'],
       ['First'],
       ['Third'],
       ['First'],
       ['Third'],
       ['Third'],
       ['First'],
       ['Third'],
       ['Third'],
       ['Second'],
       ['Third'],
       ['First'],
       ['Third'],
       ['Third'],
       ['Third'],
       ['Second'],
       ['Third'],
       ['Second'],
       ['Third'],
       ['Third'],
       ['Second'],
       ['Second'],
       ['Third'],
       ['First'],
       ['Third'],
       ['Third'],
       ['Third'],
       ['First'],
       ['Third'],
       ['Third'],
       ['First'],
       ['First'],
       ['Third'],
       ['Second'],
       ['First'],
       ['First'],
       ['Third'],
       ['Third'],
       ['Third'],
       ['Third'],
       ['Third'],
       ['Second'],
       ['Third'],
       ['Second'],
       ['Third'],
       ['Third'],
       ['Third'],
       ['Third'],
       ['Third'],
       ['Third'],
       ['Third'],
       ['Third'],
       ['First'],
       ['Second'],
       ['First'],
 

In [194]:
df[['decoded']]=decode

In [196]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,decoded
0,0,3,male,22.0,1,0,7.25,S,2.0,man,True,,Southampton,no,False,Third
1,1,1,female,38.0,1,0,71.2833,C,0.0,woman,False,C,Cherbourg,yes,False,First
2,1,3,female,26.0,0,0,7.925,S,2.0,woman,False,,Southampton,yes,True,Third
3,1,1,female,35.0,1,0,53.1,S,0.0,woman,False,C,Southampton,yes,False,First
4,0,3,male,35.0,0,0,8.05,S,2.0,man,True,,Southampton,no,True,Third


In [172]:
df4=OE.inverse_transform(df2)
df4

array([['Third', 'man'],
       ['First', 'woman'],
       ['Third', 'woman'],
       ...,
       ['Third', 'woman'],
       ['First', 'man'],
       ['Third', 'man']], dtype=object)

In [127]:
# Inverse transform
df[['class']]=OE.inverse_transform(df[['class']])

ValueError: Columns must be same length as key

In [121]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,2.0,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,0.0,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,2.0,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,0.0,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,2.0,man,True,,Southampton,no,True
