# **Inverse Transform**:

Inverse Transform is a technique used to get the original values of the encoded data after applying the transformation. It is used to get the original values of the data after applying the transformation.

##### We will use the following techniques to get the original values of the data after applying the transformation:


1. **Inverse Transform in Encoding**
   * Inverse Transform in Label Encoding
   * Inverse Transform in One Hot Encoding
   * Inverse Transform in Ordinal Encoding


In [45]:
# importing all the libraries:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import  LabelEncoder, OneHotEncoder, OrdinalEncoder


In [2]:
# importing the dataset:

df = sns.load_dataset('titanic')
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [3]:
df.shape

(891, 15)

In [17]:
# Impute the missing values of all the categorical and object variables using for loop:

for col in df.columns:
    if df[col].dtype == 'object' or df[col].dtype == 'category':
        df[col] = df[col].fillna(df[col].mode().iloc[0])

df['age'] = df['age'].fillna(df['age'].mean())

# drop the deck column due to excessive missing values:

df.drop('deck', axis=1, inplace=True)

In [5]:
df.isnull().sum().sort_values(ascending=False)


survived       0
pclass         0
sex            0
age            0
sibsp          0
parch          0
fare           0
embarked       0
class          0
who            0
adult_male     0
embark_town    0
alive          0
alone          0
dtype: int64

In [6]:
# now we will encode all the categorical and object columns:

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 14 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     891 non-null    int64   
 1   pclass       891 non-null    int64   
 2   sex          891 non-null    object  
 3   age          891 non-null    float64 
 4   sibsp        891 non-null    int64   
 5   parch        891 non-null    int64   
 6   fare         891 non-null    float64 
 7   embarked     891 non-null    object  
 8   class        891 non-null    category
 9   who          891 non-null    object  
 10  adult_male   891 non-null    bool    
 11  embark_town  891 non-null    object  
 12  alive        891 non-null    object  
 13  alone        891 non-null    bool    
dtypes: bool(2), category(1), float64(2), int64(4), object(5)
memory usage: 79.4+ KB


## **Note**:

If you are encoding any column using label encoder and after encoding we want to decode the values as well to see the original values then `You SHOULD make seperate label encoder object for each column` because if you use the same label encoder object for all the columns then it will not be able to decode the values properly.

#### **So always follow this rule:**
1. Make a seperate label encoder object for each column.
2. Fit the label encoder object on the column.
3. Transform the column.
4. Inverse Transform the column.


# **Label Encoding** :



In [7]:
# print all the columns that are object or categorical:

for col in df.columns:
    if df[col].dtype == 'object' or df[col].dtype == 'category':
        print(col)

sex
embarked
class
who
embark_town
alive


In [8]:
le_sex = LabelEncoder()
le_embarked = LabelEncoder()
le_embark_town = LabelEncoder()
le_who = LabelEncoder()
le_class = LabelEncoder()
le_alive = LabelEncoder()


# fit and transform the columns:

df['sex'] = le_sex.fit_transform(df['sex'])
df['embarked'] = le_embarked.fit_transform(df['embarked'])
df['embark_town'] = le_embark_town.fit_transform(df['embark_town'])
df['who'] = le_who.fit_transform(df['who'])
df['class'] = le_class.fit_transform(df['class'])
df['alive'] = le_alive.fit_transform(df['alive'])

In [9]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,embark_town,alive,alone
0,0,3,1,22.0,1,0,7.25,2,2,1,True,2,0,False
1,1,1,0,38.0,1,0,71.2833,0,0,2,False,0,1,False
2,1,3,0,26.0,0,0,7.925,2,2,2,False,2,1,True
3,1,1,0,35.0,1,0,53.1,2,0,2,False,2,1,False
4,0,3,1,35.0,0,0,8.05,2,2,1,True,2,0,True


In [10]:
# inverse encode the encoded columns:

df['sex'] = le_sex.inverse_transform(df['sex'])
df['embarked'] = le_embarked.inverse_transform(df['embarked'])
df['embark_town'] = le_embark_town.inverse_transform(df['embark_town'])
df['who'] = le_who.inverse_transform(df['who'])
df['class'] = le_class.inverse_transform(df['class'])
df['alive'] = le_alive.inverse_transform(df['alive'])

In [11]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,Southampton,no,True


# **Oridinal Encoding**:

In [26]:
df = sns.load_dataset('titanic')
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [27]:
# Impute the missing values of all the categorical and object variables using for loop:

for col in df.columns:
    if df[col].dtype == 'object' or df[col].dtype == 'category':
        df[col] = df[col].fillna(df[col].mode().iloc[0])

df['age'] = df['age'].fillna(df['age'].mean())

# drop the deck column due to excessive missing values:

df.drop('deck', axis=1, inplace=True)

In [30]:
# oridinal encoding:

oe_sex = OrdinalEncoder()

df['sex'] = oe_sex.fit_transform(df[['sex']])

oe_embarked = OrdinalEncoder()
df['embarked'] = oe_embarked.fit_transform(df[['embarked']])

oe_embark_town = OrdinalEncoder()
df['embark_town'] = oe_embark_town.fit_transform(df[['embark_town']])

oe_class = OrdinalEncoder()
df['class'] = oe_class.fit_transform(df[['class']])

df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,embark_town,alive,alone
0,0,3,1.0,22.0,1,0,7.25,2.0,2.0,man,True,2.0,no,False
1,1,1,0.0,38.0,1,0,71.2833,0.0,0.0,woman,False,0.0,yes,False
2,1,3,0.0,26.0,0,0,7.925,2.0,2.0,woman,False,2.0,yes,True
3,1,1,0.0,35.0,1,0,53.1,2.0,0.0,woman,False,2.0,yes,False
4,0,3,1.0,35.0,0,0,8.05,2.0,2.0,man,True,2.0,no,True


In [22]:
df.isnull().sum().sort_values(ascending=False)

survived       0
pclass         0
sex            0
age            0
sibsp          0
parch          0
fare           0
embarked       0
class          0
who            0
adult_male     0
embark_town    0
alive          0
alone          0
dtype: int64

In [34]:
df['sex'] = oe_sex.inverse_transform(df[['sex']])
df['embarked'] = oe_embarked.inverse_transform(df[['embarked']])
df['embark_town'] = oe_embark_town.inverse_transform(df[['embark_town']])
df['class'] = oe_class.inverse_transform(df[['class']])
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,embark_town,alive,alone
0,0,3,1.0,22.0,1,0,7.25,2.0,2.0,man,True,2.0,no,False
1,1,1,0.0,38.0,1,0,71.2833,0.0,0.0,woman,False,0.0,yes,False
2,1,3,0.0,26.0,0,0,7.925,2.0,2.0,woman,False,2.0,yes,True
3,1,1,0.0,35.0,1,0,53.1,2.0,0.0,woman,False,2.0,yes,False
4,0,3,1.0,35.0,0,0,8.05,2.0,2.0,man,True,2.0,no,True


## **One Hot Encoding:**

In [50]:
df = sns.load_dataset('titanic')
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [51]:
# Impute the missing values of all the categorical, object and numerical variables using for loop:

for col in df.columns:
    if df[col].dtype == 'object' or df[col].dtype == 'category':
        df[col] = df[col].fillna(df[col].mode().iloc[0])
    elif df[col].dtype == 'float64' or df[col].dtype == 'int64':
        df[col] = df[col].fillna(df[col].median())

In [52]:
df.isnull().sum().sort_values(ascending=False)

survived       0
pclass         0
sex            0
age            0
sibsp          0
parch          0
fare           0
embarked       0
class          0
who            0
adult_male     0
deck           0
embark_town    0
alive          0
alone          0
dtype: int64

In [53]:
# one hot encode the columns:

cat_col = ['class']

encoder  = OneHotEncoder(sparse_output=False)
encoded_df = pd.DataFrame(encoder.fit_transform(df[cat_col]))

# concatenate the dataframes:
df = pd.concat([df, encoded_df], axis=1)
df.head()



Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,0,1,2
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,C,Southampton,no,False,0.0,0.0,1.0
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,1.0,0.0,0.0
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,C,Southampton,yes,True,0.0,0.0,1.0
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False,1.0,0.0,0.0
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,C,Southampton,no,True,0.0,0.0,1.0


In [55]:
cat_columns = ['sex', 'embarked']

encoder = OneHotEncoder(sparse_output=False)
encoded_df = pd.DataFrame(encoder.fit_transform(df[cat_columns]))

# concatenate the dataframes 
df = pd.concat([df, encoded_df], axis=1)

df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,...,alive,alone,0,1,2,0.1,1.1,2.1,3,4
0,0,3,male,22.0,1,0,7.25,S,Third,man,...,no,False,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0
1,1,1,female,38.0,1,0,71.2833,C,First,woman,...,yes,False,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
2,1,3,female,26.0,0,0,7.925,S,Third,woman,...,yes,True,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0
3,1,1,female,35.0,1,0,53.1,S,First,woman,...,yes,False,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
4,0,3,male,35.0,0,0,8.05,S,Third,man,...,no,True,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0


In [58]:
onehot_encoder = OneHotEncoder(sparse_output=False, drop='first')
df_encoded_onehot = df.copy()
df_onehot_encoded = onehot_encoder.fit_transform(df[cat_col])
df_onehot_encoded = pd.DataFrame(df_onehot_encoded, columns=onehot_encoder.get_feature_names_out(cat_col))
df_encoded_onehot = df_encoded_onehot.drop(columns=cat_col)
df_encoded_onehot = pd.concat([df_encoded_onehot, df_onehot_encoded], axis=1)


In [59]:
# now lets inverse transform the encoded columns:

onehot_encoded_columns = onehot_encoder.get_feature_names_out(cat_col)
df_inverse_onehot = df_encoded_onehot.copy()

onehot_encoded_values = df_encoded_onehot[onehot_encoded_columns].values
inverse_transformed = onehot_encoder.inverse_transform(onehot_encoded_values)
df_inverse_onehot[cat_col] = pd.DataFrame(inverse_transformed, columns=cat_col)
df_inverse_onehot = df_inverse_onehot.drop(columns=onehot_encoded_columns)


In [61]:
df_inverse_onehot.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,who,adult_male,...,alone,0,1,2,0.1,1.1,2.1,3,4,class
0,0,3,male,22.0,1,0,7.25,S,man,True,...,False,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,Third
1,1,1,female,38.0,1,0,71.2833,C,woman,False,...,False,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,First
2,1,3,female,26.0,0,0,7.925,S,woman,False,...,True,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,Third
3,1,1,female,35.0,1,0,53.1,S,woman,False,...,False,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,First
4,0,3,male,35.0,0,0,8.05,S,man,True,...,True,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,Third
