# Filtrar datos

En este Notebook aprenderemos a manipular datos con DataFrame

In [19]:
import pandas as pd

Creamos un diccionario con el nombre de los paises y su extensión territorial en $km^2$

In [20]:
datos = {"pais" : ["Estados Unidos", "China", "Brasil", "India", "México"],
         "km2": [9833517, 9600000, 8515767, 3287263, 1964375]}

paises = pd.DataFrame(datos)
paises

Unnamed: 0,pais,km2
0,Estados Unidos,9833517
1,China,9600000
2,Brasil,8515767
3,India,3287263
4,México,1964375


Filtrar selectivamente

In [21]:
filtro = [True, False, False, False, True]
paises[filtro]

Unnamed: 0,pais,km2
0,Estados Unidos,9833517
4,México,1964375


Filtrar por una condición dada

In [22]:
filtro = paises["km2"]>4000000
paises[filtro]

Unnamed: 0,pais,km2
0,Estados Unidos,9833517
1,China,9600000
2,Brasil,8515767


In [26]:
#Para cargar archivos se utiliza el comando read
#Para leer un excel
#pd.read_excel("datos_paises.xlsx", index_col=0)

#Para leer un csv en otra carpeta
#pd.read_csv("datos/clientes.csv", index_col=0)

In [29]:
paises = pd.read_excel("datos_paises.xlsx", index_col=0)
paises

Unnamed: 0_level_0,continente,km2,poblacion_miles
pais,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
China,Asia,9600000.0,1409517
India,Asia,3287263.0,1339180
United States of America,America,9833517.0,324460
Indonesia,Asia,1910931.0,263991
Brazil,America,8515767.0,209288
...,...,...,...
Saint Helena,Africa,308.0,4
Falkland Islands (Malvinas),America,12173.0,3
Niue,Oceania,260.0,2
Holy See,Europa,1.0,1


In [30]:
filtro = paises["km2"] < 50
filtro

pais
China                          False
India                          False
United States of America       False
Indonesia                      False
Brazil                         False
                               ...  
Saint Helena                   False
Falkland Islands (Malvinas)    False
Niue                           False
Holy See                        True
Tokelau                         True
Name: km2, Length: 229, dtype: bool

In [31]:
paises[filtro]

Unnamed: 0_level_0,continente,km2,poblacion_miles
pais,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"China, Macao SAR",Asia,30.0,623
Sint Maarten (Dutch part),America,34.0,40
Monaco,Europa,2.0,39
Gibraltar,Europa,6.0,35
"Bonaire, Sint Eustatius and Saba",America,1.0,25
Nauru,Oceania,21.0,11
Tuvalu,Oceania,26.0,11
Holy See,Europa,1.0,1
Tokelau,Oceania,12.0,1


Podemos unir condiciones con simbolos lógicos 

& = y      ********   | = o   **********   ~ = not

In [32]:
paises[ (paises["km2"]<50) & (paises["poblacion_miles"]>500)]

Unnamed: 0_level_0,continente,km2,poblacion_miles
pais,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"China, Macao SAR",Asia,30.0,623


In [33]:
paises[ (paises["km2"]<5) | (paises["poblacion_miles"]<5) ]

Unnamed: 0_level_0,continente,km2,poblacion_miles
pais,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Monaco,Europa,2.0,39
"Bonaire, Sint Eustatius and Saba",America,1.0,25
Saint Helena,Africa,308.0,4
Falkland Islands (Malvinas),America,12173.0,3
Niue,Oceania,260.0,2
Holy See,Europa,1.0,1
Tokelau,Oceania,12.0,1


In [34]:
paises[(paises["continente"]=="Europa") &
       (paises["km2"]<50) & 
       (paises["poblacion_miles"]<50) ]

Unnamed: 0_level_0,continente,km2,poblacion_miles
pais,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Monaco,Europa,2.0,39
Gibraltar,Europa,6.0,35
Holy See,Europa,1.0,1


In [36]:
paises[~(paises["continente"]=="Europa") &
       (paises["km2"]<50) & 
       (paises["poblacion_miles"]<50) ]

Unnamed: 0_level_0,continente,km2,poblacion_miles
pais,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sint Maarten (Dutch part),America,34.0,40
"Bonaire, Sint Eustatius and Saba",America,1.0,25
Nauru,Oceania,21.0,11
Tuvalu,Oceania,26.0,11
Tokelau,Oceania,12.0,1


In [37]:
paises.loc[~(paises["continente"]=="Europa") &
       (paises["km2"]<50) & 
       (paises["poblacion_miles"]<50) ]

Unnamed: 0_level_0,continente,km2,poblacion_miles
pais,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sint Maarten (Dutch part),America,34.0,40
"Bonaire, Sint Eustatius and Saba",America,1.0,25
Nauru,Oceania,21.0,11
Tuvalu,Oceania,26.0,11
Tokelau,Oceania,12.0,1
