# Encoding / Decoding
-  Change values of columns based on criteria/ methodology/ condition

In [1]:
#libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pydataset import data
import plotnine

In [2]:
df = data('mtcars')

In [3]:
df.head()

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


In [4]:
# category columns 
catCols = ['cyl','gear','carb', 'am', 'vs']
df[catCols].head()

Unnamed: 0,cyl,gear,carb,am,vs
Mazda RX4,6,4,4,1,0
Mazda RX4 Wag,6,4,4,1,0
Datsun 710,4,4,1,1,1
Hornet 4 Drive,6,3,1,0,1
Hornet Sportabout,8,3,2,0,0


In [6]:
df[catCols].astype('category').dtypes

cyl     category
gear    category
carb    category
am      category
vs      category
dtype: object

In [10]:
#gears - G3, G4, G5
df[['gear']].replace({4:'Gear4'}).head()

Unnamed: 0,gear
Mazda RX4,Gear4
Mazda RX4 Wag,Gear4
Datsun 710,Gear4
Hornet 4 Drive,3
Hornet Sportabout,3


In [12]:
df[['gear']].replace({3:'Gear3', 4:'Gear4', 5:'Gear5'}).value_counts()

gear 
Gear3    15
Gear4    12
Gear5     5
dtype: int64

In [19]:
df.cyl.value_counts()

8    14
4    11
6     7
Name: cyl, dtype: int64

In [24]:
gdict = {3:'Gear3', 4:'Gear4', 5:'Gear5'}
cdict = {4:'Cyl-4', 6:'Cyl-6',8:'Cyl-8'}

In [27]:
df.replace({'gear':gdict,'cyl':cdict}, inplace=False).head()

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,Cyl-6,160.0,110,3.9,2.62,16.46,0,1,Gear4,4
Mazda RX4 Wag,21.0,Cyl-6,160.0,110,3.9,2.875,17.02,0,1,Gear4,4
Datsun 710,22.8,Cyl-4,108.0,93,3.85,2.32,18.61,1,1,Gear4,1
Hornet 4 Drive,21.4,Cyl-6,258.0,110,3.08,3.215,19.44,1,0,Gear3,1
Hornet Sportabout,18.7,Cyl-8,360.0,175,3.15,3.44,17.02,0,0,Gear3,2


In [29]:
#replace gear3 cyl 6 cars with Nothing
remap_values = {'gear':3, 'cyl':6}
df.replace(remap_values, value='Nothing', inplace=False).head()

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,Nothing,160.0,110,3.9,2.62,16.46,0,1,4,4
Mazda RX4 Wag,21.0,Nothing,160.0,110,3.9,2.875,17.02,0,1,4,4
Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,Nothing,258.0,110,3.08,3.215,19.44,1,0,Nothing,1
Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,Nothing,2


# map

In [31]:
df['gear'].map(gdict).head()  #series

Mazda RX4            Gear4
Mazda RX4 Wag        Gear4
Datsun 710           Gear4
Hornet 4 Drive       Gear3
Hornet Sportabout    Gear3
Name: gear, dtype: object

In [47]:
print('Carb Types and Count ', df.carb.value_counts())
cdict = {1:'C1',2:'C2',3:'C3'}
df['carb'].map(cdict).fillna(df['carb']).head() #change with whatever you want, rest is same

Carb Types and Count  4    10
2    10
1     7
3     3
6     1
8     1
Name: carb, dtype: int64


Mazda RX4             4
Mazda RX4 Wag         4
Datsun 710           C1
Hornet 4 Drive       C1
Hornet Sportabout    C2
Name: carb, dtype: object

In [14]:
#cyl with map(fun, iter)
def amtype(am):
    if am == 0: return 'Auto'
    else: return 'Manual'

In [38]:
df.am.map(lambda x: 'Auto' if x == 0 else 'Manual').head() #using lambda

Mazda RX4            Manual
Mazda RX4 Wag        Manual
Datsun 710           Manual
Hornet 4 Drive         Auto
Hornet Sportabout      Auto
Name: am, dtype: object

In [40]:
df.am.apply(lambda x: 'Auto' if x == 0 else 'Manual').head() #using lambda & Apply

Mazda RX4            Manual
Mazda RX4 Wag        Manual
Datsun 710           Manual
Hornet 4 Drive         Auto
Hornet Sportabout      Auto
Name: am, dtype: object

In [41]:
df.am.apply(amtype).head() #using function with apply in series

Mazda RX4            Manual
Mazda RX4 Wag        Manual
Datsun 710           Manual
Hornet 4 Drive         Auto
Hornet Sportabout      Auto
Name: am, dtype: object

In [43]:
df['am'].apply(amtype).head()  #using apply with function with series

Mazda RX4            Manual
Mazda RX4 Wag        Manual
Datsun 710           Manual
Hornet 4 Drive         Auto
Hornet Sportabout      Auto
Name: am, dtype: object

In [45]:
df[['am','vs']]['am'].apply(amtype).head()  # using apply with function in DF

Mazda RX4            Manual
Mazda RX4 Wag        Manual
Datsun 710           Manual
Hornet 4 Drive         Auto
Hornet Sportabout      Auto
Name: am, dtype: object

In [46]:
df.gear.map(gdict).head() #using function and map with series

Mazda RX4            Gear4
Mazda RX4 Wag        Gear4
Datsun 710           Gear4
Hornet 4 Drive       Gear3
Hornet Sportabout    Gear3
Name: gear, dtype: object

### end here