# Leccion 58 - filter(), where() mask()

In [1]:
import pandas as pd
import numpy as np

alcohol = pd.read_csv('https://andybek.com/pandas-drinks', usecols=['country', 'wine_servings'], index_col=['country'], squeeze=True)

In [4]:
# Encontrar todos los paises que empiecen con v  con expresiones regulares
alcohol.filter(regex='^V')

country
Vanuatu      11.0
Venezuela     3.0
Vietnam       1.0
Name: wine_servings, dtype: float64

In [8]:
# Encontrar todos los paises que empiecen con v con like
alcohol.filter(like='V')

country
Cabo Verde                      16.0
St. Vincent & the Grenadines    11.0
Vanuatu                         11.0
Venezuela                        3.0
Vietnam                          1.0
Name: wine_servings, dtype: float64

In [9]:
# Podemos filtrar por valores
alcohol[alcohol > 200] == alcohol.loc[alcohol > 200]

country
Andorra              True
Argentina            True
Australia            True
Belgium              True
Croatia              True
Denmark              True
Equatorial Guinea    True
Greece               True
Italy                True
Luxembourg           True
Portugal             True
Slovenia             True
Name: wine_servings, dtype: bool

In [13]:
# Podemos definir una función para enmascarar la Serie
def gt220(x):
  return x > 220

alcohol[gt220]

country
Andorra              312.0
Argentina            221.0
Croatia              254.0
Denmark              278.0
Equatorial Guinea    233.0
Italy                237.0
Luxembourg           271.0
Portugal             339.0
Slovenia             276.0
Name: wine_servings, dtype: float64

In [14]:
# where reemplaza los valores dentro de una Serie donde la condición es falsa
alcohol.where(alcohol > 200, other='too small')

country
Afghanistan    too small
Albania        too small
Algeria        too small
Andorra              312
Angola         too small
                 ...    
Venezuela      too small
Vietnam        too small
Yemen          too small
Zambia         too small
Zimbabwe       too small
Name: wine_servings, Length: 193, dtype: object

In [15]:
# Se puede usar una funcion lambda
alcohol.where(lambda x: x > 200, other='too small')

country
Afghanistan    too small
Albania        too small
Algeria        too small
Andorra              312
Angola         too small
                 ...    
Venezuela      too small
Vietnam        too small
Yemen          too small
Zambia         too small
Zimbabwe       too small
Name: wine_servings, Length: 193, dtype: object

In [16]:
# Si no especificamos el parametro other los elementos son reemplazados con NAN
# Por lo que podemos usar un drop NAN para eliminarlos
alcohol.where(lambda x: x > 200).dropna()

country
Andorra              312.0
Argentina            221.0
Australia            212.0
Belgium              212.0
Croatia              254.0
Denmark              278.0
Equatorial Guinea    233.0
Greece               218.0
Italy                237.0
Luxembourg           271.0
Portugal             339.0
Slovenia             276.0
Name: wine_servings, dtype: float64

In [17]:
# mask reemplaza los valores donde la condición es True de forma opuesta a where
alcohol.mask(lambda x: x > 200).dropna()

country
Albania              54.0
Algeria              14.0
Angola               45.0
Antigua & Barbuda    45.0
Armenia              11.0
                     ... 
Vanuatu              11.0
Venezuela             3.0
Vietnam               1.0
Zambia                4.0
Zimbabwe              4.0
Name: wine_servings, Length: 150, dtype: float64