# Filtrar datos

In [1]:
import pandas as pd

In [2]:
# Creamos el Dataframe a partir del dataset en CSV
# Amazon Top 50 Bestselling Books 2009-2019
df = pd.read_csv('amazon.csv')

In [3]:
# Mostrambos el Dataframe
df

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction
...,...,...,...,...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,8,2019,Fiction
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2016,Non Fiction
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction


In [5]:
# Para llevar a cabo un filtrado es necesario definir una condición
# Por ejemplo, la condición que nos permite encontrar filas que tengan
# el valor 'Stephen King' en la columna 'Author' sería la siguiente:
df['Author'] == 'Stephen King'

0      False
1       True
2      False
3      False
4      False
       ...  
545    False
546    False
547    False
548    False
549    False
Name: Author, Length: 550, dtype: bool

In [6]:
# A continuación filtramos el Dataframe por dicha condición:
df[df['Author'] == 'Stephen King']

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
79,Doctor Sleep: A Novel,Stephen King,4.7,15845,13,2013,Fiction
194,Joyland (Hard Case Crime),Stephen King,4.5,4748,12,2013,Fiction
520,Under the Dome: A Novel,Stephen King,4.3,6740,20,2009,Fiction


In [7]:
# Otro ejemplo:
# libros con un rating >= 4.9
df[df['User Rating'] >= 4.9]

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
40,"Brown Bear, Brown Bear, What Do You See?",Bill Martin Jr.,4.9,14344,5,2017,Fiction
41,"Brown Bear, Brown Bear, What Do You See?",Bill Martin Jr.,4.9,14344,5,2019,Fiction
81,Dog Man and Cat Kid: From the Creator of Capta...,Dav Pilkey,4.9,5062,6,2018,Fiction
82,Dog Man: A Tale of Two Kitties: From the Creat...,Dav Pilkey,4.9,4786,8,2017,Fiction
83,Dog Man: Brawl of the Wild: From the Creator o...,Dav Pilkey,4.9,7235,4,2018,Fiction
84,Dog Man: Brawl of the Wild: From the Creator o...,Dav Pilkey,4.9,7235,4,2019,Fiction
85,Dog Man: Fetch-22: From the Creator of Captain...,Dav Pilkey,4.9,12619,8,2019,Fiction
86,Dog Man: For Whom the Ball Rolls: From the Cre...,Dav Pilkey,4.9,9089,8,2019,Fiction
87,Dog Man: Lord of the Fleas: From the Creator o...,Dav Pilkey,4.9,5470,6,2018,Fiction
146,"Goodnight, Goodnight Construction Site (Hardco...",Sherri Duskey Rinker,4.9,7038,7,2012,Fiction


In [8]:
# Otro ejemplo:
# libros de ficción publicados en el año 2019
# Obsérvese que en Pandas debemos usar el operador '&', no 'and'
df[(df['Genre'] == 'Fiction') & (df['Year'] == 2019)]

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
41,"Brown Bear, Brown Bear, What Do You See?",Bill Martin Jr.,4.9,14344,5,2019,Fiction
84,Dog Man: Brawl of the Wild: From the Creator o...,Dav Pilkey,4.9,7235,4,2019,Fiction
85,Dog Man: Fetch-22: From the Creator of Captain...,Dav Pilkey,4.9,12619,8,2019,Fiction
86,Dog Man: For Whom the Ball Rolls: From the Cre...,Dav Pilkey,4.9,9089,8,2019,Fiction
127,Giraffes Can't Dance,Giles Andreae,4.8,14038,4,2019,Fiction
145,Goodnight Moon,Margaret Wise Brown,4.8,8837,5,2019,Fiction
155,Harry Potter and the Goblet of Fire: The Illus...,J. K. Rowling,4.9,7758,18,2019,Fiction
179,If Animals Kissed Good Night,Ann Whitford Paul,4.8,16643,4,2019,Fiction
252,"Oh, the Places You'll Go!",Dr. Seuss,4.9,21834,8,2019,Fiction
267,Player's Handbook (Dungeons & Dragons),Wizards RPG Team,4.8,16990,27,2019,Fiction


In [9]:
# Otro ejemplo:
# libros con Price < 5 o User Rating > 4.9
# Obsérvese que en Pandas debemos usar el operador '|', no 'or'
df[(df['Price'] < 5) | (df['User Rating'] > 4.5)]

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction
...,...,...,...,...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,8,2019,Fiction
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2016,Non Fiction
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction
