<h1>Pivot Table</h1>

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
                   "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"], 
                   "C": ["small", "large", "large", "small", "small", "large", "small", "small", "large"], 
                   "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], 
                   "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]})

In [3]:
df

Unnamed: 0,A,B,C,D,E
0,foo,one,small,1,2
1,foo,one,large,2,4
2,foo,one,large,2,5
3,foo,two,small,3,5
4,foo,two,small,3,6
5,bar,one,large,4,6
6,bar,one,small,5,8
7,bar,two,small,6,9
8,bar,two,large,7,9


In [6]:
table = pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'], aggfunc=np.sum, fill_value=0)
table

Unnamed: 0_level_0,C,large,small
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,4,5
bar,two,7,6
foo,one,4,1
foo,two,0,6


In [8]:
table2 = pd.pivot_table(df, values=['D', 'E'], index=['A', 'B'], aggfunc={'D': np.sum, 'E': [min, max, np.mean]})
table2

Unnamed: 0_level_0,Unnamed: 1_level_0,D,E,E,E
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,max,mean,min
A,B,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
bar,one,9,8.0,7.0,6.0
bar,two,13,9.0,9.0,9.0
foo,one,5,5.0,3.666667,2.0
foo,two,6,6.0,5.5,5.0


<h1>One-Hot Encoding</h1>

In [15]:
df2 = pd.DataFrame({'animal': ['dog', 'cat', 'dog'], 'sexo': ['macho', 'femea', 'femea'], 'idade': [1, 2, 3]})
df2

Unnamed: 0,animal,sexo,idade
0,dog,macho,1
1,cat,femea,2
2,dog,femea,3


In [11]:
pd.get_dummies(df2, prefix=['animal', 'sexo'], drop_first=True)

Unnamed: 0,idade,animal_dog,sexo_macho
0,1,1,1
1,2,0,0
2,3,1,0


<h1>Cat Codes</h1>

In [17]:
for col in ['animal', 'sexo']:
    df2[col] = df2[col].astype('category')

cat_columns = df2.select_dtypes(['category']).columns

map_categ_cols = {}
for col in cat_columns:
    map_categ_cols[col] = dict(enumerate(df2[col].cat.categories))
print(map_categ_cols)

df2[cat_columns] = df2[cat_columns].apply(lambda x: x.cat.codes)

{'animal': {0: 'cat', 1: 'dog'}, 'sexo': {0: 'femea', 1: 'macho'}}


In [18]:
df2

Unnamed: 0,animal,sexo,idade
0,1,1,1
1,0,0,2
2,1,0,3
