# Como filtrar linhas do dataframe pelo valor da coluna. 

### Importando Pacotes

In [1]:
import pandas as pd

### Lendo dados

In [2]:
movies = pd.read_csv('http://bit.ly/imdbratings')
movies.head(2)

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"


In [3]:
movies.shape

(979, 6)

In [4]:
movies[movies['duration'] >= 200].head(2)

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
7,8.9,The Lord of the Rings: The Return of the King,PG-13,Adventure,201,"[u'Elijah Wood', u'Viggo Mortensen', u'Ian McK..."


In [5]:
type(True)

bool

In [6]:
type(False)

bool

In [7]:
booleans = []
for length in movies['duration']:
    if length >= 200:
        booleans.append(True)
    else: 
         booleans.append(False)


In [8]:
booleans[0:5]

[False, False, True, False, False]

In [9]:
len(booleans)

979

In [10]:
is_long = pd.Series(booleans)
is_long.head(2)

0    False
1    False
dtype: bool

In [11]:
is_long.describe

<bound method NDFrame.describe of 0      False
1      False
2       True
3      False
4      False
       ...  
974    False
975    False
976    False
977    False
978    False
Length: 979, dtype: bool>

In [12]:
movies[is_long].head(2)

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
7,8.9,The Lord of the Rings: The Return of the King,PG-13,Adventure,201,"[u'Elijah Wood', u'Viggo Mortensen', u'Ian McK..."


### Diminuindo a complexidade do for para uma comparação boleana

In [13]:
is_long = movies['duration'] >= 200
is_long.head()

0    False
1    False
2     True
3    False
4    False
Name: duration, dtype: bool

In [14]:
is_long.name

'duration'

In [15]:
movies[is_long].head(2)

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
7,8.9,The Lord of the Rings: The Return of the King,PG-13,Adventure,201,"[u'Elijah Wood', u'Viggo Mortensen', u'Ian McK..."


### Filtrando as linhas de um dataframe pela condição e selecionando a serie.

In [16]:
movies[movies['duration'] >= 200]['genre']

2          Crime
7      Adventure
17         Drama
78         Crime
85     Adventure
142    Adventure
157        Drama
204    Adventure
445    Adventure
476        Drama
630    Biography
767       Action
Name: genre, dtype: object

In [17]:
# usando loc para filtro e selecionando a coluna.

movies.loc[movies['duration'] >= 200, 'genre']

2          Crime
7      Adventure
17         Drama
78         Crime
85     Adventure
142    Adventure
157        Drama
204    Adventure
445    Adventure
476        Drama
630    Biography
767       Action
Name: genre, dtype: object

# Como aplicar filtros com multiplos critérios no dataframe

In [18]:
movies[(movies['duration'] >= 200) & (movies['genre'] == 'Drama') ]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
17,8.7,Seven Samurai,UNRATED,Drama,207,"[u'Toshir\xf4 Mifune', u'Takashi Shimura', u'K..."
157,8.2,Gone with the Wind,G,Drama,238,"[u'Clark Gable', u'Vivien Leigh', u'Thomas Mit..."
476,7.8,Hamlet,PG-13,Drama,242,"[u'Kenneth Branagh', u'Julie Christie', u'Dere..."


In [19]:
movies[(movies['duration'] >= 200) | (movies['genre'] == 'Drama') ].head(2)

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
5,8.9,12 Angry Men,NOT RATED,Drama,96,"[u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals..."


In [20]:
(movies['duration'] >= 200) & (movies['genre'] == 'Drama') 

0      False
1      False
2      False
3      False
4      False
       ...  
974    False
975    False
976    False
977    False
978    False
Length: 979, dtype: bool

In [21]:
movies[ (movies['genre'] == 'Crime') | (movies['genre'] == 'Drama' ) | (movies['genre'] == 'Action') ].head(2)

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"


In [22]:
types_movies = ['Crime', 'Drama', 'Action']
movies[ movies['genre'].isin(types_movies) ].head(2)

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"


In [23]:
movies[ movies['genre'].isin(['Crime', 'Drama', 'Action']) ].head(2)

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
