# Capítulo 3 - Manipulação de Dados em Python com Pandas

In [1]:
import pandas as pd

## Adição de Colunas e Índices Com e Sem Broadcasting

### Sem Broadcasting

In [3]:
# Carrega um arquivo do disco e armazena como um dataframe
df = pd.read_csv('dataset1.csv')

In [4]:
df.head()

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
0,ATL,Almiron,Miguel,M,1912500.0,2297000.0
1,ATL,Ambrose,Mikey,D,65625.0,65625.0
2,ATL,Asad,Yamil,M,150000.0,150000.0
3,ATL,Bloom,Mark,D,99225.0,106573.89
4,ATL,Carleton,Andrew,F,65000.0,77400.0


In [5]:
# Adicionando nova coluna
df['salario_final'] = 0

In [6]:
df.head()

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation,salario_final
0,ATL,Almiron,Miguel,M,1912500.0,2297000.0,0
1,ATL,Ambrose,Mikey,D,65625.0,65625.0,0
2,ATL,Asad,Yamil,M,150000.0,150000.0,0
3,ATL,Bloom,Mark,D,99225.0,106573.89,0
4,ATL,Carleton,Andrew,F,65000.0,77400.0,0


In [7]:
# Preenchendo o valor da coluna a partir de outras variáveis
df['salario_final'] = df['base_salary'] + df['guaranteed_compensation']

In [11]:
df

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation,salario_final
0,ATL,Almiron,Miguel,M,1912500.0,2297000.00,4209500.00
1,ATL,Ambrose,Mikey,D,65625.0,65625.00,131250.00
2,ATL,Asad,Yamil,M,150000.0,150000.00,300000.00
3,ATL,Bloom,Mark,D,99225.0,106573.89,205798.89
4,ATL,Carleton,Andrew,F,65000.0,77400.00,142400.00
...,...,...,...,...,...,...,...
610,VAN,Teibert,Russell,M,126500.0,194000.00,320500.00
611,VAN,Tornaghi,Paolo,GK,80000.0,80000.00,160000.00
612,VAN,Waston,Kendall,D,350000.0,368125.00,718125.00
613,,,,,,,


In [12]:
# Adicionando nova coluna
df.insert(0, column = 'ID', value = range(1, 1 + len(df)))

In [13]:
df.head()

Unnamed: 0,ID,club,last_name,first_name,position,base_salary,guaranteed_compensation,salario_final
0,1,ATL,Almiron,Miguel,M,1912500.0,2297000.0,4209500.0
1,2,ATL,Ambrose,Mikey,D,65625.0,65625.0,131250.0
2,3,ATL,Asad,Yamil,M,150000.0,150000.0,300000.0
3,4,ATL,Bloom,Mark,D,99225.0,106573.89,205798.89
4,5,ATL,Carleton,Andrew,F,65000.0,77400.0,142400.0


In [14]:
df.tail()

Unnamed: 0,ID,club,last_name,first_name,position,base_salary,guaranteed_compensation,salario_final
610,611,VAN,Teibert,Russell,M,126500.0,194000.0,320500.0
611,612,VAN,Tornaghi,Paolo,GK,80000.0,80000.0,160000.0
612,613,VAN,Waston,Kendall,D,350000.0,368125.0,718125.0
613,614,,,,,,,
614,615,VAN,Williams,Sheanon,D,175000.0,184000.0,359000.0


### Com Broadcasting

#### Broadcasting é a propagação de uma operação ao longo do dataframe.

In [15]:
df.head()

Unnamed: 0,ID,club,last_name,first_name,position,base_salary,guaranteed_compensation,salario_final
0,1,ATL,Almiron,Miguel,M,1912500.0,2297000.0,4209500.0
1,2,ATL,Ambrose,Mikey,D,65625.0,65625.0,131250.0
2,3,ATL,Asad,Yamil,M,150000.0,150000.0,300000.0
3,4,ATL,Bloom,Mark,D,99225.0,106573.89,205798.89
4,5,ATL,Carleton,Andrew,F,65000.0,77400.0,142400.0


In [17]:
# Essa instrução não altera o dataframe original
df['base_salary'].add(5)

0      1912505.0
1        65630.0
2       150005.0
3        99230.0
4        65005.0
         ...    
610     126505.0
611      80005.0
612     350005.0
613          NaN
614     175005.0
Name: base_salary, Length: 615, dtype: float64

In [18]:
df.base_salary.head()

0    1912500.0
1      65625.0
2     150000.0
3      99225.0
4      65000.0
Name: base_salary, dtype: float64

In [19]:
# Essa instrução altera o dataframe original
df['base_salary'] = df['base_salary'].add(5)

In [20]:
df.base_salary.head()

0    1912505.0
1      65630.0
2     150005.0
3      99230.0
4      65005.0
Name: base_salary, dtype: float64

In [21]:
# Adicionando nova coluna usando Broadcasting (convertendo de USD para EURO, neste exemplo)
df['base_salary_eur'] = df['base_salary'].mul(0.93)

In [22]:
df.head()

Unnamed: 0,ID,club,last_name,first_name,position,base_salary,guaranteed_compensation,salario_final,base_salary_eur
0,1,ATL,Almiron,Miguel,M,1912505.0,2297000.0,4209500.0,1778629.65
1,2,ATL,Ambrose,Mikey,D,65630.0,65625.0,131250.0,61035.9
2,3,ATL,Asad,Yamil,M,150005.0,150000.0,300000.0,139504.65
3,4,ATL,Bloom,Mark,D,99230.0,106573.89,205798.89,92283.9
4,5,ATL,Carleton,Andrew,F,65005.0,77400.0,142400.0,60454.65


In [27]:
# Drop de coluna (somente no resultado)
df.drop(columns = ['base_salary_eur'])

Unnamed: 0,ID,club,last_name,first_name,position,base_salary,guaranteed_compensation,salario_final
0,1,ATL,Almiron,Miguel,M,1912505.0,2297000.00,4209500.00
1,2,ATL,Ambrose,Mikey,D,65630.0,65625.00,131250.00
2,3,ATL,Asad,Yamil,M,150005.0,150000.00,300000.00
3,4,ATL,Bloom,Mark,D,99230.0,106573.89,205798.89
4,5,ATL,Carleton,Andrew,F,65005.0,77400.00,142400.00
...,...,...,...,...,...,...,...,...
610,611,VAN,Teibert,Russell,M,126505.0,194000.00,320500.00
611,612,VAN,Tornaghi,Paolo,GK,80005.0,80000.00,160000.00
612,613,VAN,Waston,Kendall,D,350005.0,368125.00,718125.00
613,614,,,,,,,


In [28]:
df.head()

Unnamed: 0,ID,club,last_name,first_name,position,base_salary,base_salary_eur,guaranteed_compensation,salario_final
0,1,ATL,Almiron,Miguel,M,1912505.0,1778629.65,2297000.0,4209500.0
1,2,ATL,Ambrose,Mikey,D,65630.0,61035.9,65625.0,131250.0
2,3,ATL,Asad,Yamil,M,150005.0,139504.65,150000.0,300000.0
3,4,ATL,Bloom,Mark,D,99230.0,92283.9,106573.89,205798.89
4,5,ATL,Carleton,Andrew,F,65005.0,60454.65,77400.0,142400.0


In [29]:
# Para dropar a coluna do data set é necessário o inplace = True
df.drop(columns = ['base_salary_eur'], inplace = True)

In [30]:
df.head()

Unnamed: 0,ID,club,last_name,first_name,position,base_salary,guaranteed_compensation,salario_final
0,1,ATL,Almiron,Miguel,M,1912505.0,2297000.0,4209500.0
1,2,ATL,Ambrose,Mikey,D,65630.0,65625.0,131250.0
2,3,ATL,Asad,Yamil,M,150005.0,150000.0,300000.0
3,4,ATL,Bloom,Mark,D,99230.0,106573.89,205798.89
4,5,ATL,Carleton,Andrew,F,65005.0,77400.0,142400.0


In [31]:
# Adicionando nova coluna
df.insert(6, column = 'base_salary_eur', value = df['base_salary'].mul(0.93))

In [32]:
df.head()

Unnamed: 0,ID,club,last_name,first_name,position,base_salary,base_salary_eur,guaranteed_compensation,salario_final
0,1,ATL,Almiron,Miguel,M,1912505.0,1778629.65,2297000.0,4209500.0
1,2,ATL,Ambrose,Mikey,D,65630.0,61035.9,65625.0,131250.0
2,3,ATL,Asad,Yamil,M,150005.0,139504.65,150000.0,300000.0
3,4,ATL,Bloom,Mark,D,99230.0,92283.9,106573.89,205798.89
4,5,ATL,Carleton,Andrew,F,65005.0,60454.65,77400.0,142400.0


In [33]:
# Renomeando as variaveis
df.rename(columns = {'base_salary': 'base_salary_usd',
                    'guaranteed_compensation': 'guaranteed_compensation_usd',
                    'salario_final': 'salario_final_usd'},
         inplace = True)

In [34]:
df.head()

Unnamed: 0,ID,club,last_name,first_name,position,base_salary_usd,base_salary_eur,guaranteed_compensation_usd,salario_final_usd
0,1,ATL,Almiron,Miguel,M,1912505.0,1778629.65,2297000.0,4209500.0
1,2,ATL,Ambrose,Mikey,D,65630.0,61035.9,65625.0,131250.0
2,3,ATL,Asad,Yamil,M,150005.0,139504.65,150000.0,300000.0
3,4,ATL,Bloom,Mark,D,99230.0,92283.9,106573.89,205798.89
4,5,ATL,Carleton,Andrew,F,65005.0,60454.65,77400.0,142400.0


In [35]:
# Convertemos a coluna ID em índice da tabela
df.set_index('ID', inplace = True)

In [36]:
df.head()

Unnamed: 0_level_0,club,last_name,first_name,position,base_salary_usd,base_salary_eur,guaranteed_compensation_usd,salario_final_usd
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,ATL,Almiron,Miguel,M,1912505.0,1778629.65,2297000.0,4209500.0
2,ATL,Ambrose,Mikey,D,65630.0,61035.9,65625.0,131250.0
3,ATL,Asad,Yamil,M,150005.0,139504.65,150000.0,300000.0
4,ATL,Bloom,Mark,D,99230.0,92283.9,106573.89,205798.89
5,ATL,Carleton,Andrew,F,65005.0,60454.65,77400.0,142400.0


In [37]:
df.tail()

Unnamed: 0_level_0,club,last_name,first_name,position,base_salary_usd,base_salary_eur,guaranteed_compensation_usd,salario_final_usd
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
611,VAN,Teibert,Russell,M,126505.0,117649.65,194000.0,320500.0
612,VAN,Tornaghi,Paolo,GK,80005.0,74404.65,80000.0,160000.0
613,VAN,Waston,Kendall,D,350005.0,325504.65,368125.0,718125.0
614,,,,,,,,
615,VAN,Williams,Sheanon,D,175005.0,162754.65,184000.0,359000.0
