## Using pandas' boolean features

Boolean features are powerful methods to select, sort and extract data from dataframes. Pandas has a number of built-in boolean functions that help in slicing and dicing data. 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
#load a common data file
movie = pd.read_csv('data/movie.csv')
movie.head()

Unnamed: 0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,...,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
0,Color,James Cameron,723.0,178.0,0.0,855.0,Joel David Moore,1000.0,760505847.0,Action|Adventure|Fantasy|Sci-Fi,...,3054.0,English,USA,PG-13,237000000.0,2009.0,936.0,7.9,1.78,33000
1,Color,Gore Verbinski,302.0,169.0,563.0,1000.0,Orlando Bloom,40000.0,309404152.0,Action|Adventure|Fantasy,...,1238.0,English,USA,PG-13,300000000.0,2007.0,5000.0,7.1,2.35,0
2,Color,Sam Mendes,602.0,148.0,0.0,161.0,Rory Kinnear,11000.0,200074175.0,Action|Adventure|Thriller,...,994.0,English,UK,PG-13,245000000.0,2015.0,393.0,6.8,2.35,85000
3,Color,Christopher Nolan,813.0,164.0,22000.0,23000.0,Christian Bale,27000.0,448130642.0,Action|Thriller,...,2701.0,English,USA,PG-13,250000000.0,2012.0,23000.0,8.5,2.35,164000
4,,Doug Walker,,,131.0,,Rob Walker,131.0,,Documentary,...,,,,,,,12.0,7.1,,0


In [3]:
pd.options.display.max_columns=50

In [4]:
movie.head()

Unnamed: 0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,movie_title,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
0,Color,James Cameron,723.0,178.0,0.0,855.0,Joel David Moore,1000.0,760505847.0,Action|Adventure|Fantasy|Sci-Fi,CCH Pounder,Avatar,886204,4834,Wes Studi,0.0,avatar|future|marine|native|paraplegic,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,3054.0,English,USA,PG-13,237000000.0,2009.0,936.0,7.9,1.78,33000
1,Color,Gore Verbinski,302.0,169.0,563.0,1000.0,Orlando Bloom,40000.0,309404152.0,Action|Adventure|Fantasy,Johnny Depp,Pirates of the Caribbean: At World's End,471220,48350,Jack Davenport,0.0,goddess|marriage ceremony|marriage proposal|pi...,http://www.imdb.com/title/tt0449088/?ref_=fn_t...,1238.0,English,USA,PG-13,300000000.0,2007.0,5000.0,7.1,2.35,0
2,Color,Sam Mendes,602.0,148.0,0.0,161.0,Rory Kinnear,11000.0,200074175.0,Action|Adventure|Thriller,Christoph Waltz,Spectre,275868,11700,Stephanie Sigman,1.0,bomb|espionage|sequel|spy|terrorist,http://www.imdb.com/title/tt2379713/?ref_=fn_t...,994.0,English,UK,PG-13,245000000.0,2015.0,393.0,6.8,2.35,85000
3,Color,Christopher Nolan,813.0,164.0,22000.0,23000.0,Christian Bale,27000.0,448130642.0,Action|Thriller,Tom Hardy,The Dark Knight Rises,1144337,106759,Joseph Gordon-Levitt,0.0,deception|imprisonment|lawlessness|police offi...,http://www.imdb.com/title/tt1345836/?ref_=fn_t...,2701.0,English,USA,PG-13,250000000.0,2012.0,23000.0,8.5,2.35,164000
4,,Doug Walker,,,131.0,,Rob Walker,131.0,,Documentary,Doug Walker,Star Wars: Episode VII - The Force Awakens,8,143,,0.0,,http://www.imdb.com/title/tt5289954/?ref_=fn_t...,,,,,,,12.0,7.1,,0


In [10]:
movie.set_index('movie_title', inplace=True)

In [None]:
#the movie DF has duration listed in minutes. Let's check which are the top 25 longest movies 
movie.sort_values(['duration', 'imdb_score', 'budget'], 
                  ascending=[False, False, False]).head(25)

In [11]:
#let's create a DF of all movies longer than 150 minutes (2.5 hours)

movie_2_5_hours = movie['duration'] > 150

In [15]:
movie_2_5_hours.head(10)

movie_title
Avatar                                         True
Pirates of the Caribbean: At World's End       True
Spectre                                       False
The Dark Knight Rises                          True
Star Wars: Episode VII - The Force Awakens    False
John Carter                                   False
Spider-Man 3                                   True
Tangled                                       False
Avengers: Age of Ultron                       False
Harry Potter and the Half-Blood Prince         True
Name: duration, dtype: bool

In [17]:
movie_2_5_hours.mean()

0.04068348250610252

In [18]:
movie_2_5_hours.describe()

count      4916
unique        2
top       False
freq       4716
Name: duration, dtype: object

In [22]:
movie_2_5_hours.value_counts()

#just 200 movies are over 2.5 hours long

False    4716
True      200
Name: duration, dtype: int64

In [55]:
actors_fb = movie[['actor_1_facebook_likes', 'actor_2_facebook_likes', 'actor_1_name', 'actor_2_name']].dropna()

In [56]:
(actors_fb['actor_1_facebook_likes'] == actors_fb['actor_2_facebook_likes']).sum()

109

In [58]:
#which cases are both actors having the same likes?
actors_fb[(actors_fb['actor_1_facebook_likes'] == actors_fb['actor_2_facebook_likes'])].head(25)

Unnamed: 0_level_0,actor_1_facebook_likes,actor_2_facebook_likes,actor_1_name,actor_2_name
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Rush Hour 3,268.0,268.0,Tzi Ma,Dana Ivey
Guardians of the Galaxy,14000.0,14000.0,Bradley Cooper,Vin Diesel
Interstellar,11000.0,11000.0,Matthew McConaughey,Anne Hathaway
Ratatouille,1000.0,1000.0,Janeane Garofalo,John Ratzenberger
Mission: Impossible - Rogue Nation,10000.0,10000.0,Tom Cruise,Jeremy Renner
Mission: Impossible - Ghost Protocol,10000.0,10000.0,Tom Cruise,Jeremy Renner
Ant-Man,2000.0,2000.0,Judy Greer,Hayley Atwell
Mr. & Mrs. Smith,11000.0,11000.0,Brad Pitt,Angelina Jolie Pitt
Around the World in 80 Days,1000.0,1000.0,Jim Broadbent,Steve Coogan
End of Days,1000.0,1000.0,CCH Pounder,Mark Margolis


#### Constructing multiple boolean conditions

In [65]:
#Which are the highest rated movies with PG-13 rating that were released before the year 2000 or after 2010?

condition1 = movie.imdb_score > 7.5
condition2 = movie.content_rating == 'PG-13'
condition3 = (movie.title_year < 2010) | (movie.title_year > 2010)

condition2.head()

movie_title
Avatar                                         True
Pirates of the Caribbean: At World's End       True
Spectre                                        True
The Dark Knight Rises                          True
Star Wars: Episode VII - The Force Awakens    False
Name: content_rating, dtype: bool

In [66]:
final_condition = condition1 & condition2 & condition3

In [76]:
final_condition.sum()

#there are 144 movies that meet the criteria

144

In [82]:
#which are the top 25 movies meet these three criteria?
movie[final_condition].sort_values('imdb_score', ascending=False).head(25)

Unnamed: 0_level_0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
The Dark Knight,Color,Christopher Nolan,645.0,152.0,22000.0,11000.0,Heath Ledger,23000.0,533316061.0,Action|Crime|Drama|Thriller,Christian Bale,1676169,57802,Morgan Freeman,0.0,based on comic book|dc comics|psychopath|star ...,http://www.imdb.com/title/tt0468569/?ref_=fn_t...,4667.0,English,USA,PG-13,185000000.0,2008.0,13000.0,9.0,2.35,37000
The Lord of the Rings: The Return of the King,Color,Peter Jackson,328.0,192.0,0.0,416.0,Billy Boyd,5000.0,377019252.0,Action|Adventure|Drama|Fantasy,Orlando Bloom,1215718,6434,Bernard Hill,2.0,battle|epic|king|orc|ring,http://www.imdb.com/title/tt0167260/?ref_=fn_t...,3189.0,English,USA,PG-13,94000000.0,2003.0,857.0,8.9,2.35,16000
Forrest Gump,Black and White,Robert Zemeckis,149.0,142.0,0.0,194.0,Siobhan Fallon Hogan,15000.0,329691196.0,Comedy|Drama,Tom Hanks,1251222,15700,Sam Anderson,0.0,amputee|love|vietnam|vietnam war|war hero,http://www.imdb.com/title/tt0109830/?ref_=fn_t...,1398.0,English,USA,PG-13,55000000.0,1994.0,294.0,8.8,2.35,59000
The Lord of the Rings: The Fellowship of the Ring,Color,Peter Jackson,297.0,171.0,0.0,857.0,Orlando Bloom,16000.0,313837577.0,Action|Adventure|Drama|Fantasy,Christopher Lee,1238746,22342,Billy Boyd,2.0,elf|hobbit|middle earth|quest|ring,http://www.imdb.com/title/tt0120737/?ref_=fn_t...,5060.0,English,New Zealand,PG-13,93000000.0,2001.0,5000.0,8.8,2.35,21000
Queen of the Mountains,Color,Sadyk Sher-Niyaz,16.0,135.0,135.0,0.0,Aziz Muradillayev,0.0,,Action|Biography|Drama|History,Elina Abai Kyzy,3144,0,Mirlan Abdulayev,1.0,19th century|central asia|mother son relations...,http://www.imdb.com/title/tt2640460/?ref_=fn_t...,24.0,English,Kyrgyzstan,PG-13,1400000.0,2014.0,0.0,8.7,2.35,0
The Lord of the Rings: The Two Towers,Color,Peter Jackson,294.0,172.0,0.0,857.0,Orlando Bloom,16000.0,340478898.0,Action|Adventure|Drama|Fantasy,Christopher Lee,1100446,23052,Billy Boyd,1.0,epic|evil wizard|middle earth|ring|wizard,http://www.imdb.com/title/tt0167261/?ref_=fn_t...,2417.0,English,USA,PG-13,94000000.0,2002.0,5000.0,8.7,2.35,10000
Interstellar,Color,Christopher Nolan,712.0,169.0,22000.0,6000.0,Anne Hathaway,11000.0,187991439.0,Adventure|Drama|Sci-Fi,Matthew McConaughey,928227,31488,Mackenzie Foy,1.0,black hole|father daughter relationship|saving...,http://www.imdb.com/title/tt0816692/?ref_=fn_t...,2725.0,English,USA,PG-13,165000000.0,2014.0,11000.0,8.6,2.35,349000
Once Upon a Time in the West,Color,Sergio Leone,164.0,145.0,0.0,392.0,Woody Strode,973.0,,Western,Claudia Cardinale,216005,2589,Jack Elam,1.0,gang|gun|harmonica|killing|woman in bathtub,http://www.imdb.com/title/tt0064116/?ref_=fn_t...,565.0,English,Italy,PG-13,5000000.0,1968.0,423.0,8.6,2.35,10000
Samsara,Color,Ron Fricke,115.0,102.0,330.0,0.0,Balinese Tari Legong Dancers,48.0,2601847.0,Documentary|Music,Collin Alfredo St. Dic,22457,48,Puti Sri Candra Dewi,0.0,hall of mirrors|mont saint michel france|palac...,http://www.imdb.com/title/tt0770802/?ref_=fn_t...,69.0,,USA,PG-13,4000000.0,2011.0,0.0,8.5,2.35,26000
The Dark Knight Rises,Color,Christopher Nolan,813.0,164.0,22000.0,23000.0,Christian Bale,27000.0,448130642.0,Action|Thriller,Tom Hardy,1144337,106759,Joseph Gordon-Levitt,0.0,deception|imprisonment|lawlessness|police offi...,http://www.imdb.com/title/tt1345836/?ref_=fn_t...,2701.0,English,USA,PG-13,250000000.0,2012.0,23000.0,8.5,2.35,164000


#### Some more filtering using multiple criteria 

In [127]:
#Which movies with budget in excess of 150 million released after the year 2010, or budget less than 50 million 
# but more than 25 million and released before 2000, had an 'R' rating?

condition1 = movie.budget > 150000000
condition2 = movie.content_rating == 'PG-13'
condition3 = (movie.title_year > 2010) 

In [128]:
sub_condition1 = condition1 & condition2 & condition3

In [141]:
condition4 = (movie.budget > 25000000)
condition5 = movie.content_rating == 'PG-13'
condition6 = movie.title_year < 2000

In [142]:
sub_condition2 = condition4 & condition5 & condition6

In [147]:
movie[sub_condition1]

Unnamed: 0_level_0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
Spectre,Color,Sam Mendes,602.0,148.0,0.0,161.0,Rory Kinnear,11000.0,200074175.0,Action|Adventure|Thriller,Christoph Waltz,275868,11700,Stephanie Sigman,1.0,bomb|espionage|sequel|spy|terrorist,http://www.imdb.com/title/tt2379713/?ref_=fn_t...,994.0,English,UK,PG-13,245000000.0,2015.0,393.0,6.8,2.35,85000
The Dark Knight Rises,Color,Christopher Nolan,813.0,164.0,22000.0,23000.0,Christian Bale,27000.0,448130642.0,Action|Thriller,Tom Hardy,1144337,106759,Joseph Gordon-Levitt,0.0,deception|imprisonment|lawlessness|police offi...,http://www.imdb.com/title/tt1345836/?ref_=fn_t...,2701.0,English,USA,PG-13,250000000.0,2012.0,23000.0,8.5,2.35,164000
John Carter,Color,Andrew Stanton,462.0,132.0,475.0,530.0,Samantha Morton,640.0,73058679.0,Action|Adventure|Sci-Fi,Daryl Sabara,212204,1873,Polly Walker,1.0,alien|american civil war|male nipple|mars|prin...,http://www.imdb.com/title/tt0401729/?ref_=fn_t...,738.0,English,USA,PG-13,263700000.0,2012.0,632.0,6.6,2.35,24000
Avengers: Age of Ultron,Color,Joss Whedon,635.0,141.0,0.0,19000.0,Robert Downey Jr.,26000.0,458991599.0,Action|Adventure|Sci-Fi,Chris Hemsworth,462669,92000,Scarlett Johansson,4.0,artificial intelligence|based on comic book|ca...,http://www.imdb.com/title/tt2395427/?ref_=fn_t...,1117.0,English,USA,PG-13,250000000.0,2015.0,21000.0,7.5,2.35,118000
Batman v Superman: Dawn of Justice,Color,Zack Snyder,673.0,183.0,0.0,2000.0,Lauren Cohan,15000.0,330249062.0,Action|Adventure|Sci-Fi,Henry Cavill,371639,24450,Alan D. Purwin,0.0,based on comic book|batman|sequel to a reboot|...,http://www.imdb.com/title/tt2975590/?ref_=fn_t...,3018.0,English,USA,PG-13,250000000.0,2016.0,4000.0,6.9,2.35,197000
The Lone Ranger,Color,Gore Verbinski,450.0,150.0,563.0,1000.0,Ruth Wilson,40000.0,89289910.0,Action|Adventure|Western,Johnny Depp,181792,45757,Tom Wilkinson,1.0,horse|outlaw|texas|texas ranger|train,http://www.imdb.com/title/tt1210819/?ref_=fn_t...,711.0,English,USA,PG-13,215000000.0,2013.0,2000.0,6.5,2.35,48000
Man of Steel,Color,Zack Snyder,733.0,143.0,0.0,748.0,Christopher Meloni,15000.0,291021565.0,Action|Adventure|Fantasy|Sci-Fi,Henry Cavill,548573,20495,Harry Lennix,0.0,based on comic book|british actor playing amer...,http://www.imdb.com/title/tt0770828/?ref_=fn_t...,2536.0,English,USA,PG-13,225000000.0,2013.0,3000.0,7.2,2.35,118000
The Avengers,Color,Joss Whedon,703.0,173.0,0.0,19000.0,Robert Downey Jr.,26000.0,623279547.0,Action|Adventure|Sci-Fi,Chris Hemsworth,995415,87697,Scarlett Johansson,3.0,alien invasion|assassin|battle|iron man|soldier,http://www.imdb.com/title/tt0848228/?ref_=fn_t...,1722.0,English,USA,PG-13,220000000.0,2012.0,21000.0,8.1,1.85,123000
Pirates of the Caribbean: On Stranger Tides,Color,Rob Marshall,448.0,136.0,252.0,1000.0,Sam Claflin,40000.0,241063875.0,Action|Adventure|Fantasy,Johnny Depp,370704,54083,Stephen Graham,4.0,blackbeard|captain|pirate|revenge|soldier,http://www.imdb.com/title/tt1298650/?ref_=fn_t...,484.0,English,USA,PG-13,250000000.0,2011.0,11000.0,6.7,2.35,58000
Men in Black 3,Color,Barry Sonnenfeld,451.0,106.0,188.0,718.0,Michael Stuhlbarg,10000.0,179020854.0,Action|Adventure|Comedy|Family|Fantasy|Sci-Fi,Will Smith,268154,12572,Nicole Scherzinger,1.0,alien|criminal|m.i.b.|maximum security prison|...,http://www.imdb.com/title/tt1409024/?ref_=fn_t...,341.0,English,USA,PG-13,225000000.0,2012.0,816.0,6.8,1.85,40000


In [149]:
movie[sub_condition2]

Unnamed: 0_level_0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
Titanic,Color,James Cameron,315.0,194.0,0.0,794.0,Kate Winslet,29000.0,658672302.0,Drama|Romance,Leonardo DiCaprio,793059,45223,Gloria Stuart,0.0,artist|love|ship|titanic|wet,http://www.imdb.com/title/tt0120338/?ref_=fn_t...,2528.0,English,USA,PG-13,2.000000e+08,1997.0,14000.0,7.7,2.35,26000
Wild Wild West,Color,Barry Sonnenfeld,85.0,106.0,188.0,582.0,Salma Hayek,10000.0,113745408.0,Action|Comedy|Sci-Fi|Western,Will Smith,129601,15870,Bai Ling,2.0,buddy movie|general|inventor|steampunk|utah,http://www.imdb.com/title/tt0120891/?ref_=fn_t...,648.0,English,USA,PG-13,1.700000e+08,1999.0,4000.0,4.8,1.85,0
Waterworld,Color,Kevin Reynolds,91.0,176.0,58.0,60.0,Rick Aviles,711.0,88246220.0,Action|Adventure|Sci-Fi|Thriller,Jeanne Tripplehorn,144337,1004,Zakes Mokae,0.0,future|sail|sea|smoker|water,http://www.imdb.com/title/tt0114898/?ref_=fn_t...,309.0,English,USA,PG-13,1.750000e+08,1995.0,60.0,6.1,1.85,0
Armageddon,Color,Michael Bay,167.0,153.0,0.0,537.0,Steve Buscemi,13000.0,201573391.0,Action|Adventure|Sci-Fi|Thriller,Bruce Willis,322395,26029,Will Patton,0.0,asteroid|astronaut|bomb|meteorite|outer space,http://www.imdb.com/title/tt0120591/?ref_=fn_t...,1171.0,English,USA,PG-13,1.400000e+08,1998.0,12000.0,6.6,2.35,11000
The World Is Not Enough,Color,Michael Apted,197.0,128.0,150.0,244.0,Maria Grazia Cucinotta,766.0,126930660.0,Action|Adventure|Thriller,Colin Salmon,157519,2037,Desmond Llewelyn,2.0,british|oil|scientist|terrorist|tycoon,http://www.imdb.com/title/tt0143145/?ref_=fn_t...,683.0,English,UK,PG-13,1.350000e+08,1999.0,536.0,6.4,2.35,2000
Batman & Robin,Color,Joel Schumacher,183.0,125.0,541.0,409.0,Vivica A. Fox,920.0,107285004.0,Action,Michael Gough,189855,2699,John Glover,3.0,butler|critically bashed|cure|freeze|gotham,http://www.imdb.com/title/tt0118688/?ref_=fn_t...,1018.0,English,USA,PG-13,1.250000e+08,1997.0,890.0,3.7,1.85,0
Dante's Peak,Color,Roger Donaldson,93.0,108.0,79.0,268.0,Grant Heslov,650.0,67155742.0,Action|Adventure|Thriller,Jamie Renée Smith,62271,1569,Tzi Ma,1.0,ash|escape|lava|mayor|volcano,http://www.imdb.com/title/tt0118928/?ref_=fn_t...,277.0,English,USA,PG-13,1.160000e+08,1997.0,293.0,5.8,2.35,0
Tomorrow Never Dies,Color,Roger Spottiswoode,160.0,119.0,55.0,387.0,Colin Salmon,811.0,125332007.0,Action|Adventure|Thriller,Vincent Schiavelli,149680,2958,Joe Don Baker,3.0,ex boyfriend ex girlfriend relationship|media ...,http://www.imdb.com/title/tt0120347/?ref_=fn_t...,328.0,English,UK,PG-13,1.100000e+08,1997.0,766.0,6.5,2.35,0
Speed 2: Cruise Control,Color,Jan de Bont,79.0,121.0,101.0,202.0,Temuera Morrison,673.0,48068396.0,Action|Crime|Romance|Thriller,Jason Patric,60573,2027,Lois Chiles,0.0,collision course|computer|cruise|diamonds|ship...,http://www.imdb.com/title/tt0120179/?ref_=fn_t...,248.0,English,USA,PG-13,1.600000e+08,1997.0,368.0,3.7,2.35,894
Batman Forever,Color,Joel Schumacher,144.0,121.0,541.0,680.0,Rene Auberjonois,920.0,184031112.0,Action|Adventure|Fantasy,Michael Gough,190786,2880,Debi Mazar,4.0,love|necktie|partner|rock music|tuxedo,http://www.imdb.com/title/tt0112462/?ref_=fn_t...,539.0,English,USA,PG-13,1.000000e+08,1995.0,710.0,5.4,1.85,0


In [152]:
select_cols1 = ['imdb_score', 'gross', 'content_rating', 'title_year']
movie_subcon1_filtered1 = movie.loc[sub_condition1, select_cols1]

In [161]:
movie_subcon1_filtered1.sort_values('imdb_score', ascending=False)

Unnamed: 0_level_0,imdb_score,gross,content_rating,title_year
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Interstellar,8.6,187991439.0,PG-13,2014.0
The Dark Knight Rises,8.5,448130642.0,PG-13,2012.0
Captain America: Civil War,8.2,407197282.0,PG-13,2016.0
The Avengers,8.1,623279547.0,PG-13,2012.0
Guardians of the Galaxy,8.1,333130696.0,PG-13,2014.0
X-Men: Days of Future Past,8.0,233914986.0,PG-13,2014.0
Edge of Tomorrow,7.9,100189501.0,PG-13,2014.0
The Hobbit: An Unexpected Journey,7.9,303001229.0,PG-13,2012.0
The Hobbit: The Desolation of Smaug,7.9,258355354.0,PG-13,2013.0
Skyfall,7.8,304360277.0,PG-13,2012.0


In [160]:
movie_subcon1_filtered2 = movie.loc[sub_condition2, select_cols1]
movie_subcon1_filtered2.sort_values('imdb_score', ascending=False)

Unnamed: 0_level_0,imdb_score,gross,content_rating,title_year
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Forrest Gump,8.8,329691196.0,PG-13,1994.0
Princess Mononoke,8.4,2298191.0,PG-13,1997.0
Indiana Jones and the Last Crusade,8.3,197171806.0,PG-13,1989.0
The Sixth Sense,8.1,293501675.0,PG-13,1999.0
Jurassic Park,8.1,356784000.0,PG-13,1993.0
Gattaca,7.8,12339633.0,PG-13,1997.0
The Fugitive,7.8,183875760.0,PG-13,1993.0
Malcolm X,7.7,48169908.0,PG-13,1992.0
The Fifth Element,7.7,63540020.0,PG-13,1997.0
As Good as It Gets,7.7,147637474.0,PG-13,1997.0


#### Using boolean indexing with index selection

In [162]:
college = pd.read_csv('data/college.csv')

In [163]:
college.head()

Unnamed: 0,INSTNM,CITY,STABBR,HBCU,MENONLY,WOMENONLY,RELAFFIL,SATVRMID,SATMTMID,DISTANCEONLY,UGDS,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN,PPTUG_EF,CURROPER,PCTPELL,PCTFLOAN,UG25ABV,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
0,Alabama A & M University,Normal,AL,1.0,0.0,0.0,0,424.0,420.0,0.0,4206.0,0.0333,0.9353,0.0055,0.0019,0.0024,0.0019,0.0,0.0059,0.0138,0.0656,1,0.7356,0.8284,0.1049,30300,33888.0
1,University of Alabama at Birmingham,Birmingham,AL,0.0,0.0,0.0,0,570.0,565.0,0.0,11383.0,0.5922,0.26,0.0283,0.0518,0.0022,0.0007,0.0368,0.0179,0.01,0.2607,1,0.346,0.5214,0.2422,39700,21941.5
2,Amridge University,Montgomery,AL,0.0,0.0,0.0,1,,,1.0,291.0,0.299,0.4192,0.0069,0.0034,0.0,0.0,0.0,0.0,0.2715,0.4536,1,0.6801,0.7795,0.854,40100,23370.0
3,University of Alabama in Huntsville,Huntsville,AL,0.0,0.0,0.0,0,595.0,590.0,0.0,5451.0,0.6988,0.1255,0.0382,0.0376,0.0143,0.0002,0.0172,0.0332,0.035,0.2146,1,0.3072,0.4596,0.264,45500,24097.0
4,Alabama State University,Montgomery,AL,1.0,0.0,0.0,0,425.0,430.0,0.0,4811.0,0.0158,0.9208,0.0121,0.0019,0.001,0.0006,0.0098,0.0243,0.0137,0.0892,1,0.7347,0.7554,0.127,26600,33118.5


In [174]:
college[college['STABBR'] == 'CA'].head()

Unnamed: 0,INSTNM,CITY,STABBR,HBCU,MENONLY,WOMENONLY,RELAFFIL,SATVRMID,SATMTMID,DISTANCEONLY,UGDS,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN,PPTUG_EF,CURROPER,PCTPELL,PCTFLOAN,UG25ABV,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
192,Academy of Art University,San Francisco,CA,0.0,0.0,0.0,0,,,0.0,9885.0,0.2392,0.0685,0.1141,0.0804,0.0051,0.0058,0.0249,0.2523,0.2098,0.4334,1,0.4008,0.5524,0.4043,36000.0,35093
193,ITT Technical Institute-Rancho Cordova,Rancho Cordova,CA,0.0,0.0,0.0,0,,,0.0,500.0,0.472,0.114,0.11,0.076,0.008,0.002,0.04,0.0,0.178,0.254,0,0.7137,0.7667,0.7235,38800.0,25827.5
194,Academy of Chinese Culture and Health Sciences,Oakland,CA,0.0,0.0,0.0,0,,,0.0,,,,,,,,,,,,1,,,,,PrivacySuppressed
195,The Academy of Radio and TV Broadcasting,Huntington Beach,CA,0.0,0.0,0.0,0,,,0.0,14.0,0.2143,0.4286,0.3571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.9579,1.0,0.4545,28400.0,9500
196,Avalon School of Cosmetology-Alameda,Alameda,CA,0.0,0.0,0.0,0,,,0.0,253.0,0.1265,0.4743,0.2253,0.0672,0.0079,0.0,0.0553,0.0,0.0435,0.5099,1,0.7407,0.6768,0.3387,21600.0,9860


In [179]:
college2 = college.set_index("STABBR")

In [182]:
%timeit college[college['STABBR']=='CA']

2.14 ms ± 117 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [183]:
%timeit college2.loc['CA']

1.02 ms ± 95.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


#### Create a list of elements present in the index to extract data selectively

In [187]:
states = ['CA', 'NY', 'NJ', 'WA']
college[college['STABBR'].isin(states)]

Unnamed: 0,INSTNM,CITY,STABBR,HBCU,MENONLY,WOMENONLY,RELAFFIL,SATVRMID,SATMTMID,DISTANCEONLY,UGDS,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN,PPTUG_EF,CURROPER,PCTPELL,PCTFLOAN,UG25ABV,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
192,Academy of Art University,San Francisco,CA,0.0,0.0,0.0,0,,,0.0,9885.0,0.2392,0.0685,0.1141,0.0804,0.0051,0.0058,0.0249,0.2523,0.2098,0.4334,1,0.4008,0.5524,0.4043,36000,35093
193,ITT Technical Institute-Rancho Cordova,Rancho Cordova,CA,0.0,0.0,0.0,0,,,0.0,500.0,0.4720,0.1140,0.1100,0.0760,0.0080,0.0020,0.0400,0.0000,0.1780,0.2540,0,0.7137,0.7667,0.7235,38800,25827.5
194,Academy of Chinese Culture and Health Sciences,Oakland,CA,0.0,0.0,0.0,0,,,0.0,,,,,,,,,,,,1,,,,,PrivacySuppressed
195,The Academy of Radio and TV Broadcasting,Huntington Beach,CA,0.0,0.0,0.0,0,,,0.0,14.0,0.2143,0.4286,0.3571,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,1,0.9579,1.0000,0.4545,28400,9500
196,Avalon School of Cosmetology-Alameda,Alameda,CA,0.0,0.0,0.0,0,,,0.0,253.0,0.1265,0.4743,0.2253,0.0672,0.0079,0.0000,0.0553,0.0000,0.0435,0.5099,1,0.7407,0.6768,0.3387,21600,9860
197,College of Alameda,Alameda,CA,0.0,0.0,0.0,0,,,0.0,5141.0,0.1529,0.2128,0.2196,0.3021,0.0037,0.0041,0.0473,0.0076,0.0500,0.8440,1,0.2273,0.0117,0.3940,31900,PrivacySuppressed
198,Allan Hancock College,Santa Maria,CA,0.0,0.0,0.0,0,,,0.0,9738.0,0.3565,0.0279,0.5287,0.0418,0.0070,0.0043,0.0312,0.0021,0.0005,0.6630,1,0.2531,0.0231,0.3713,29800,10500
199,American Academy of Dramatic Arts-Los Angeles,Los Angeles,CA,0.0,0.0,0.0,0,,,0.0,280.0,0.4143,0.0821,0.0964,0.0107,0.0107,0.0036,0.1321,0.2429,0.0071,0.0000,1,0.5039,0.6008,0.1589,27800,12000
200,American Baptist Seminary of the West,Berkeley,CA,0.0,0.0,0.0,1,,,0.0,,,,,,,,,,,,1,,,,,PrivacySuppressed
201,American Film Institute Conservatory,Los Angeles,CA,0.0,0.0,0.0,0,,,0.0,,,,,,,,,,,,1,,,,,PrivacySuppressed


In [191]:
#alternative method

college2.loc[states].head()

Unnamed: 0_level_0,INSTNM,CITY,HBCU,MENONLY,WOMENONLY,RELAFFIL,SATVRMID,SATMTMID,DISTANCEONLY,UGDS,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN,PPTUG_EF,CURROPER,PCTPELL,PCTFLOAN,UG25ABV,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
STABBR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
CA,Academy of Art University,San Francisco,0.0,0.0,0.0,0,,,0.0,9885.0,0.2392,0.0685,0.1141,0.0804,0.0051,0.0058,0.0249,0.2523,0.2098,0.4334,1,0.4008,0.5524,0.4043,36000.0,35093
CA,ITT Technical Institute-Rancho Cordova,Rancho Cordova,0.0,0.0,0.0,0,,,0.0,500.0,0.472,0.114,0.11,0.076,0.008,0.002,0.04,0.0,0.178,0.254,0,0.7137,0.7667,0.7235,38800.0,25827.5
CA,Academy of Chinese Culture and Health Sciences,Oakland,0.0,0.0,0.0,0,,,0.0,,,,,,,,,,,,1,,,,,PrivacySuppressed
CA,The Academy of Radio and TV Broadcasting,Huntington Beach,0.0,0.0,0.0,0,,,0.0,14.0,0.2143,0.4286,0.3571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.9579,1.0,0.4545,28400.0,9500
CA,Avalon School of Cosmetology-Alameda,Alameda,0.0,0.0,0.0,0,,,0.0,253.0,0.1265,0.4743,0.2253,0.0672,0.0079,0.0,0.0553,0.0,0.0435,0.5099,1,0.7407,0.6768,0.3387,21600.0,9860


#### Create your own index by combining elements from the DF

In [192]:
college.columns

Index(['INSTNM', 'CITY', 'STABBR', 'HBCU', 'MENONLY', 'WOMENONLY', 'RELAFFIL',
       'SATVRMID', 'SATMTMID', 'DISTANCEONLY', 'UGDS', 'UGDS_WHITE',
       'UGDS_BLACK', 'UGDS_HISP', 'UGDS_ASIAN', 'UGDS_AIAN', 'UGDS_NHPI',
       'UGDS_2MOR', 'UGDS_NRA', 'UGDS_UNKN', 'PPTUG_EF', 'CURROPER', 'PCTPELL',
       'PCTFLOAN', 'UG25ABV', 'MD_EARN_WNE_P10', 'GRAD_DEBT_MDN_SUPP'],
      dtype='object')

In [195]:
college.index = college["CITY"] + ', ' + college['STABBR']
college = college.sort_index()
college.head()

Unnamed: 0,INSTNM,CITY,STABBR,HBCU,MENONLY,WOMENONLY,RELAFFIL,SATVRMID,SATMTMID,DISTANCEONLY,UGDS,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN,PPTUG_EF,CURROPER,PCTPELL,PCTFLOAN,UG25ABV,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
"ARTESIA, CA",Angeles Institute,ARTESIA,CA,0.0,0.0,0.0,0,,,0.0,114.0,0.0175,0.2193,0.386,0.3158,0.0,0.0263,0.0175,0.0088,0.0088,0.0,1,0.6275,0.8138,0.5429,,16850
"Aberdeen, SD",Presentation College,Aberdeen,SD,0.0,0.0,0.0,1,440.0,480.0,0.0,705.0,0.6525,0.1163,0.078,0.0128,0.0156,0.0,0.0284,0.0142,0.0823,0.2865,1,0.4829,0.756,0.3097,35900.0,25000
"Aberdeen, SD",Northern State University,Aberdeen,SD,0.0,0.0,0.0,0,480.0,475.0,0.0,1693.0,0.8435,0.023,0.0319,0.0112,0.0207,0.003,0.0219,0.0425,0.0024,0.1872,1,0.2272,0.4303,0.1766,33600.0,24847
"Aberdeen, WA",Grays Harbor College,Aberdeen,WA,0.0,0.0,0.0,0,,,0.0,1121.0,0.711,0.0169,0.0946,0.0214,0.0312,0.0054,0.0937,0.0009,0.025,0.182,1,0.453,0.1502,0.5087,27000.0,11490
"Abilene, TX",Hardin-Simmons University,Abilene,TX,0.0,0.0,0.0,1,508.0,515.0,0.0,1576.0,0.7126,0.0742,0.1472,0.0076,0.0019,0.0006,0.0298,0.0159,0.0102,0.0685,1,0.3256,0.5547,0.0982,38700.0,25864


In [197]:
college.loc['Miami, FL'].head()

Unnamed: 0,INSTNM,CITY,STABBR,HBCU,MENONLY,WOMENONLY,RELAFFIL,SATVRMID,SATMTMID,DISTANCEONLY,UGDS,UGDS_WHITE,UGDS_BLACK,UGDS_HISP,UGDS_ASIAN,UGDS_AIAN,UGDS_NHPI,UGDS_2MOR,UGDS_NRA,UGDS_UNKN,PPTUG_EF,CURROPER,PCTPELL,PCTFLOAN,UG25ABV,MD_EARN_WNE_P10,GRAD_DEBT_MDN_SUPP
"Miami, FL",New Professions Technical Institute,Miami,FL,0.0,0.0,0.0,0,,,0.0,56.0,0.0179,0.0714,0.9107,0.0,0.0,0.0,0.0,0.0,0.0,0.4464,1,0.8701,0.678,0.8358,18700,8682
"Miami, FL",Management Resources College,Miami,FL,0.0,0.0,0.0,0,,,0.0,708.0,0.0071,0.0523,0.9407,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.4239,0.5458,0.8698,PrivacySuppressed,12182
"Miami, FL",Strayer University-Doral,Miami,FL,,,,1,,,,,,,,,,,,,,,1,,,,49200,36173.5
"Miami, FL",Keiser University- Miami,Miami,FL,,,,1,,,,,,,,,,,,,,,1,,,,29700,26063
"Miami, FL",George T Baker Aviation Technical College,Miami,FL,0.0,0.0,0.0,0,,,0.0,649.0,0.0894,0.1263,0.7735,0.0046,0.0,0.0015,0.0046,0.0,0.0,0.5686,1,0.2567,0.0,0.4366,38600,PrivacySuppressed


In [198]:
%%timeit 
criteria1 = college['CITY'] == 'Miami' 
criteria2 = college['STABBR'] == 'FL'
college[criteria1 & criteria2]

3.27 ms ± 528 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [199]:
%timeit college.loc['Miami, FL']

312 µs ± 57 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
