# Data Science Quick Tip #001: Reverse One Hot Encoding!
This notebook contains the code I wrote related to this associated blog post. Screenshots of this code can be found inline within the blog post. Feel free to use this code for your own learning purposes!

### Importing Necessary Libraries

In [1]:
import pandas as pd

### Generating Fake Data in Animal DataFrame

In [2]:
animals_df = pd.DataFrame({'animal': ['cat', 'dog', 'bird', 'monkey', 'elephant', 'cat', 'bird']})
animals_df

Unnamed: 0,animal
0,cat
1,dog
2,bird
3,monkey
4,elephant
5,cat
6,bird


### Demonstrating Poor Enumeration with Animal DataFrame

In [3]:
# Writing poor enumeration function
def animals_to_numbers(animal):
    if animal == 'cat':
        return 1
    elif animal == 'dog':
        return 2
    elif animal == 'monkey':
        return 3
    elif animal == 'elephant':
        return 4
    elif animal == 'bird':
        return 5

In [4]:
# Applying poor enumeration to animals_df
animals_df['animal_numbers'] = animals_df['animal'].apply(animals_to_numbers)
animals_df

Unnamed: 0,animal,animal_numbers
0,cat,1
1,dog,2
2,bird,5
3,monkey,3
4,elephant,4
5,cat,1
6,bird,5


### Demonstrating Arbitrary Enumeration with a Better Choice Dataset - Coffee!

In [5]:
# Creating fake data df in coffee_df
coffee_df = pd.DataFrame({'coffee': ['tall', 'grande', 'venti', 'trenta']})

In [6]:
# Creating function to perform coffee enumeration
def coffee_to_numbers(coffee):
    if coffee == 'tall':
        return 1
    elif coffee == 'grande':
        return 2
    elif coffee == 'venti':
        return 3
    elif coffee == 'trenta':
        return 4

In [7]:
# Performing enumeration on coffee data
coffee_df['coffee_numbers'] = coffee_df['coffee'].apply(coffee_to_numbers)
coffee_df

Unnamed: 0,coffee,coffee_numbers
0,tall,1
1,grande,2
2,venti,3
3,trenta,4


### Performing OHE on animals_df

In [8]:
# Dropping the animal_numbers feature generated earlier
animals_df.drop(columns = ['animal_numbers'], inplace = True)

In [9]:
# Generating and concatenating new OHE features
animals_ohe = pd.get_dummies(animals_df['animal'])
animals_df = pd.concat([animals_df, animals_ohe], axis = 1)
animals_df

Unnamed: 0,animal,bird,cat,dog,elephant,monkey
0,cat,0,1,0,0,0
1,dog,0,0,1,0,0
2,bird,1,0,0,0,0
3,monkey,0,0,0,0,1
4,elephant,0,0,0,1,0
5,cat,0,1,0,0,0
6,bird,1,0,0,0,0


### Demonstrating reverse OHE using animals_df

In [10]:
# Carving off original 'animal' feature
animal_feat = animals_df['animal']

In [11]:
# Dropping original 'animal' feature from animals_df
animals_df.drop(columns = ['animal'], inplace = True)
animals_df

Unnamed: 0,bird,cat,dog,elephant,monkey
0,0,1,0,0,0
1,0,0,1,0,0
2,1,0,0,0,0
3,0,0,0,0,1
4,0,0,0,1,0
5,0,1,0,0,0
6,1,0,0,0,0


In [12]:
# Performing reverse OHE using Pandas idxmax
animals_df['animal'] = animals_df.idxmax(1)
animals_df

Unnamed: 0,bird,cat,dog,elephant,monkey,animal
0,0,1,0,0,0,cat
1,0,0,1,0,0,dog
2,1,0,0,0,0,bird
3,0,0,0,0,1,monkey
4,0,0,0,1,0,elephant
5,0,1,0,0,0,cat
6,1,0,0,0,0,bird
