# Data wrangling in Python

## 1 Seting up the dataframe
A dataframe will be set up following the following structure:

In [None]:
#Do not run
Name  Team  Country  POP   GDP   AOE
John   B      NIC      8    22   True
Lara   A      GBP     50    34   False
Matt   C      MEX    110    53   True
Loes   A      CAN      9    35   False 
Irit   B      FRA     60    36   True
Adam   A      CHI    999    36   False
Paco   C      SLV      4    62   True
Pete   B      NED     17    36   True
Joao   C      UGA     40    65   False

### 0.1 Create a dataframe by row

In [28]:
import pandas as pd

df = pd.DataFrame(columns=['Team','Country','POP','GDP','AOE'], index=['John','Lara','Matt','Loes','Joao','Adam'])
df.loc['John'] = pd.Series({'Team':'B','Country':'NIC','POP':   8,'GDP': 22,'AOE':True})
df.loc['Lara'] = pd.Series({'Team':'A','Country':'GBP','POP':  50,'GDP': 34,'AOE':False})
df.loc['Matt'] = pd.Series({'Team':'C','Country':'MEX','POP': 110,'GDP': 53,'AOE':True})
df.loc['Loes'] = pd.Series({'Team':'A','Country':'CAN','POP':   9,'GDP': 35,'AOE':True})
df.loc['Joao'] = pd.Series({'Team':'B','Country':'FRA','POP':  60,'GDP': 36,'AOE':False})
df.loc['Adam'] = pd.Series({'Team':'A','Country':'CHI','POP': 999,'GDP': 44,'AOE':False})

df

Unnamed: 0,Team,Country,POP,GDP,AOE
John,B,NIC,8,22,True
Lara,A,GBP,50,34,False
Matt,C,MEX,110,53,True
Loes,A,CAN,9,35,True
Joao,B,FRA,60,36,False
Adam,A,CHI,999,44,False


### 0.2 Create a dataframe by column

In [59]:
import pandas as pd

df = pd.DataFrame(columns=['Team','Country','POP','GDP','AOE'], index=['John','Lara','Matt','Loes','Joao','Adam'])
df['Team'] = pd.Series({'John':'B','Lara':'A','Matt': 'C','Loes': 'A','Joao':'B', 'Adam':'A' })
df['Country'] = pd.Series({'John':'NIC','Lara':'GBP','Matt': 'MEX','Loes': 'CAN','Joao':'FRA', 'Adam':'CHI' })
df['POP'] = pd.Series({'John':8,'Lara':50,'Matt':110 ,'Loes':9 ,'Joao':60, 'Adam':999 })
df['GDP'] = pd.Series({'John':22,'Lara':34,'Matt':53 ,'Loes':35 ,'Joao':36, 'Adam':44 })
df['AOE'] = pd.Series({'John':True,'Lara':False,'Matt':True ,'Loes':True ,'Joao':False, 'Adam':False })

df


Unnamed: 0,Team,Country,POP,GDP,AOE
John,B,NIC,8,22,True
Lara,A,GBP,50,34,False
Matt,C,MEX,110,53,True
Loes,A,CAN,9,35,True
Joao,B,FRA,60,36,False
Adam,A,CHI,999,44,False


### 0.3 Modify column header

In [61]:
df.rename(columns={'POP': 'Pop'})

Unnamed: 0,Team,Country,Pop,GDP,AOE
John,B,NIC,8,22,True
Lara,A,GBP,50,34,False
Matt,C,MEX,110,53,True
Loes,A,CAN,9,35,True
Joao,B,FRA,60,36,False
Adam,A,CHI,999,44,False


### 0.4 Modify row index

In [147]:
df.rename(index={'Joao':'João'})


Unnamed: 0,Team,Country,POP,GDP,AOE
John,B,NIC,8,22,True
Lara,A,GBP,50,34,False
Matt,C,MEX,110,53,True
Loes,A,CAN,9,35,True
João,B,FRA,60,36,False
Adam,A,CHI,999,44,False


## 1. Subseting dataframes

### 1.1 Subseting rows by name/index

In [123]:
df.loc['Lara':'Loes']
df.iloc[1:4]
# returns a dataframe object when subseting a range of rows
# returns a series object when subseting a single row

Unnamed: 0,Team,Country,POP,GDP,AOE
Lara,A,GBP,50,34,False
Matt,C,MEX,110,53,True
Loes,A,CAN,9,35,True


### 1.2 Subseting columns by name, index

In [146]:
#df.loc[:, ['Country','GDP']]
df[df.columns[1:4]]
df.loc[:, 'Country':'GDP']
df.iloc[:, 1:4]
# returns a dataframe object when subseting a range of rows
# returns a series object when subseting a single row 

Unnamed: 0,Country,POP,GDP
John,NIC,8,22
Lara,GBP,50,34
Matt,MEX,110,53
Loes,CAN,9,35
Joao,FRA,60,36
Adam,CHI,999,44


### 1.3 Subseting ranges by name, index

In [145]:
df.loc['Lara':'Loes', 'Country':'GDP']
df.iloc[1:4, 1:4]
# returns a dataframe object 

Unnamed: 0,Country,POP,GDP
Lara,GBP,50,34
Matt,MEX,110,53
Loes,CAN,9,35


### 1.4 Subseting a single value by name, index

In [152]:
df.iloc[2,3]
df.loc['Matt','GDP']
#Returns an integer

53

In [86]:
df[1:3]

Unnamed: 0,Team,Country,POP,GDP,AOE
Lara,A,GBP,50,34,False
Matt,C,MEX,110,53,True
