In [1]:
import pandas as pd

In [4]:
bond = pd.read_csv("jamesbond.csv")
bond.head(3)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


## The set_index and reset_index Methods

#### Allows you to assign a brand new index to your DF or to reset index to standard setting numeric index that Pandas gives you from the start

In [12]:
bond = pd.read_csv("jamesbond.csv")
bond.head(3)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [15]:
# using the index_col parameter will lead you to (in this case)  use the film name as the index 
#bond = pd.read_csv("jamesbond.csv", index_col = "Film")

bond = pd.read_csv("jamesbond.csv")
bond.head(3)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [16]:
# this method allows us to pass the name of the column you want to serve as the index
# this is equivalent to the idex_col parameter above
# this operation is not permanent so if you want it to stick pass in the inplace parameter 

bond.set_index(keys="Film", inplace = True)

In [17]:
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [20]:
# this is how you reverse what we did above; we want to go back to the standard numeric index 
# The drop = Fase in the parameter means that it will not drop the column that previously served as the index 
# if you do want to drop it then all you have to do is set the value to True 

bond.reset_index(drop = False)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
5,You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
6,On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
7,Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
8,Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
9,The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,


In [59]:
# the film index will be tossed out if we pass the True boolean 
#
bond.reset_index(drop = True)
bond.head(3)

Unnamed: 0,Film,Year,index,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,0,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,1,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,2,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [56]:
# to revert to what we had before we want to pass in the inplace patameter 

bond.reset_index(drop = False, inplace = True)
bond.head(3)

Unnamed: 0,Film,Year,index,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,0,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,1,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,2,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [55]:
# in case we have an existing index and we want to replace it with another column without losing the original data 
# we are setting the film column to serve as our index 

bond.set_index("Film", inplace = True)
bond.head()

Unnamed: 0_level_0,Year,index,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Dr. No,1962,0,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,1,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,2,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Thunderball,1965,3,Sean Connery,Terence Young,848.1,41.9,4.7
Casino Royale,1967,4,David Niven,Ken Hughes,315.0,85.0,


In [60]:
# instead of having the film column in bold (the index) we want the years
# as we can see here the Film column is going to be tossed out of memorry 

bond.reset_index(inplace = True)
bond.set_index("Year")

Unnamed: 0_level_0,level_0,Film,index,Actor,Director,Box Office,Budget,Bond Actor Salary
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1962,0,Dr. No,0,Sean Connery,Terence Young,448.8,7.0,0.6
1963,1,From Russia with Love,1,Sean Connery,Terence Young,543.8,12.6,1.6
1964,2,Goldfinger,2,Sean Connery,Guy Hamilton,820.4,18.6,3.2
1965,3,Thunderball,3,Sean Connery,Terence Young,848.1,41.9,4.7
1967,4,Casino Royale,4,David Niven,Ken Hughes,315.0,85.0,
1967,5,You Only Live Twice,5,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
1969,6,On Her Majesty's Secret Service,6,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
1971,7,Diamonds Are Forever,7,Sean Connery,Guy Hamilton,442.5,34.7,5.8
1973,8,Live and Let Die,8,Roger Moore,Guy Hamilton,460.3,30.8,
1974,9,The Man with the Golden Gun,9,Roger Moore,Guy Hamilton,334.0,27.7,


In [62]:
bond.head(3)

Unnamed: 0,level_0,Film,Year,index,Actor,Director,Box Office,Budget,Bond Actor Salary
0,0,Dr. No,1962,0,Sean Connery,Terence Young,448.8,7.0,0.6
1,1,From Russia with Love,1963,1,Sean Connery,Terence Young,543.8,12.6,1.6
2,2,Goldfinger,1964,2,Sean Connery,Guy Hamilton,820.4,18.6,3.2


## Retrive Rows by index label with .loc[] Accessor

#### How we can extract one or more rows from a dataframe by their index labels 

In [67]:
# we want to set the film column to the index label 
# The films will be sorted alphabetically 

bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [68]:
# pandas use this form to access because when we extract from a list or a dictionary, we tend to folloe bracket notation 
# whar we are returned with for this is a series of the information holding that film 

bond.loc["Goldfinger"]

Year                         1964
Actor                Sean Connery
Director             Guy Hamilton
Box Office                  820.4
Budget                       18.6
Bond Actor Salary             3.2
Name: Goldfinger, dtype: object

In [71]:
# because there is one match for what we are looking for we are returned with a series 

bond.loc["GoldenEye"]

Year                            1995
Actor                 Pierce Brosnan
Director             Martin Campbell
Box Office                     518.5
Budget                          76.9
Bond Actor Salary                5.1
Name: GoldenEye, dtype: object

In [75]:
# here we have two items that match the index so we get back a dataframe that holds the info that we are looking for 

bond.loc["Casino Royale"]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [76]:
bond.loc["Goldfinger"]

Year                         1964
Actor                Sean Connery
Director             Guy Hamilton
Box Office                  820.4
Budget                       18.6
Bond Actor Salary             3.2
Name: Goldfinger, dtype: object

In [77]:
bond.loc["Diamonds Are Forever" : "From Russia with Love"]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6


In [80]:
bond.loc["Diamonds Are Forever" : "From Russia with Love" ]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6


In [81]:
bond.loc["Diamonds Are Forever" : "From Russia with Love" :2]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6


In [89]:
# to extract more than one row just do double brackets 

bond.loc[["GoldenEye", "Diamonds Are Forever"]]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8


In [92]:
# to check if a given label exists you can use the in python built in operator 

"Gold Bond" in bond.index

False

## Retrieve Rows by Index Position iloc Accessor 

#### extract one or more rows by index position 

In [120]:
bond = pd.read_csv("jamesbond.csv")
bond.head(3)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [95]:
# we want to retreve data from index position 0 

bond.iloc[0]

Film                        Dr. No
Year                          1962
Actor                 Sean Connery
Director             Terence Young
Box Office                   448.8
Budget                           7
Bond Actor Salary              0.6
Name: 0, dtype: object

In [96]:
bond.iloc[15]

Film                 A View to a Kill
Year                             1985
Actor                     Roger Moore
Director                    John Glen
Box Office                      275.2
Budget                           54.5
Bond Actor Salary                 9.1
Name: 15, dtype: object

In [97]:
bond.iloc[[15, 20]]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
15,A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
20,The World Is Not Enough,1999,Pierce Brosnan,Michael Apted,439.5,158.3,13.5


In [99]:
# the end point is exclusive when we are dealing with numeric indezes 

bond.iloc[4:8]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
5,You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
6,On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
7,Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8


In [104]:
# pull everything starting from index position 20 all the way to the end of the list 

bond.iloc[20:]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
20,The World Is Not Enough,1999,Pierce Brosnan,Michael Apted,439.5,158.3,13.5
21,Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
22,Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
23,Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
24,Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
25,Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,


In [107]:
bond.iloc[:4]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7


In [123]:
bond.set_index("Film", inplace = True)
bond.sort_index(inplace = True)

In [124]:
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [129]:
bond.iloc[0]

bond.iloc[15]

bond.iloc[:8]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6


In [130]:
# extracting at multiple positions 

bond.iloc[[9, 10, 14]]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8


## Second Arguments to loc and iloc Accessors

#### 

In [134]:
bond = pd.read_csv("jamesbond.csv", index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [136]:
# this is telling you to extract the row Moonraker but only the column Actor 
# the first element befor the comma is the row and after the comma is the column

bond.loc["Moonraker", "Actor"]

'Roger Moore'

In [137]:
# director of Moonraker

bond.loc["Moonraker", "Director"]

'Lewis Gilbert'

In [139]:
# to pass multiple columns that you want to extract 
# we want the director and the box office of Moonraker

bond.loc["Moonraker", ['Director', 'Box Office']]

Director      Lewis Gilbert
Box Office              535
Name: Moonraker, dtype: object

In [140]:
# will get a dataframe here because of the amount of rows that we want to extract 

bond.loc[["Moonraker", "GoldenEye"], ['Director', 'Box Office']

Unnamed: 0_level_0,Director,Box Office
Film,Unnamed: 1_level_1,Unnamed: 2_level_1
Moonraker,Lewis Gilbert,535.0
GoldenEye,Martin Campbell,518.5


In [142]:
# we can do list slicing with this 

bond.loc["Moonraker", "Director" : "Budget"]


# here we want to extract all the movies from moonraker to thunderball and all the columns from director to 
# budget
bond.loc["Moonraker" : "Thunderball", "Director" : "Budget"]

Unnamed: 0_level_0,Director,Box Office,Budget
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Moonraker,Lewis Gilbert,535.0,91.5
Never Say Never Again,Irvin Kershner,380.0,86.0
Octopussy,John Glen,373.8,53.9
On Her Majesty's Secret Service,Peter R. Hunt,291.5,37.3
Quantum of Solace,Marc Forster,514.2,181.4
Skyfall,Sam Mendes,943.5,170.2
Spectre,Sam Mendes,726.7,206.3
The Living Daylights,John Glen,313.5,68.8
The Man with the Golden Gun,Guy Hamilton,334.0,27.7
The Spy Who Loved Me,Lewis Gilbert,533.0,45.1


In [144]:
bond.loc["Moonraker":, "Director":]

Unnamed: 0_level_0,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Moonraker,Lewis Gilbert,535.0,91.5,
Never Say Never Again,Irvin Kershner,380.0,86.0,
Octopussy,John Glen,373.8,53.9,7.8
On Her Majesty's Secret Service,Peter R. Hunt,291.5,37.3,0.6
Quantum of Solace,Marc Forster,514.2,181.4,8.1
Skyfall,Sam Mendes,943.5,170.2,14.5
Spectre,Sam Mendes,726.7,206.3,
The Living Daylights,John Glen,313.5,68.8,5.2
The Man with the Golden Gun,Guy Hamilton,334.0,27.7,
The Spy Who Loved Me,Lewis Gilbert,533.0,45.1,


In [145]:
bond.loc[: "Moonraker", : "Director"]

Unnamed: 0_level_0,Year,Actor,Director
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A View to a Kill,1985,Roger Moore,John Glen
Casino Royale,2006,Daniel Craig,Martin Campbell
Casino Royale,1967,David Niven,Ken Hughes
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton
Die Another Day,2002,Pierce Brosnan,Lee Tamahori
Dr. No,1962,Sean Connery,Terence Young
For Your Eyes Only,1981,Roger Moore,John Glen
From Russia with Love,1963,Sean Connery,Terence Young
GoldenEye,1995,Pierce Brosnan,Martin Campbell
Goldfinger,1964,Sean Connery,Guy Hamilton


### for iloc

In [147]:
bond.iloc[14,2]

'John Glen'

In [148]:
# here we want to extract position 14 and 15 and then the columns from 2 to 4 (because 5 is exclusive)

bond.iloc[[14, 15], 2:5]

Unnamed: 0_level_0,Director,Box Office,Budget
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Octopussy,John Glen,373.8,53.9
On Her Majesty's Secret Service,Peter R. Hunt,291.5,37.3


## Set New Value for a Specific Cell

#### set a new value for one or more rows 

In [3]:
bond = pd.read_csv("jamesbond.csv", index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [7]:
# you can overwrite with the assignment operator also known as = sign, so here we wull be replacing the actor in
# Dr. No with Sir Sean Connery 

bond.loc["Dr. No", "Actor"] = "Sir Sean Connery"

In [8]:
bond.loc["Dr. No", "Actor"]

'Sir Sean Connery'

In [11]:
# how do we cange multiple values? 
# Here we specifciy which columns we want to change for the given row and we indicate the changes 
# in sequence on a list 

bond.loc["Dr. No", ["Box Office", "Budget", "Bond Actor Salary"]] = [448800000, 7000000, 600000]

In [12]:
bond.loc["Dr. No"]

Year                             1962
Actor                Sir Sean Connery
Director                Terence Young
Box Office                  4.488e+08
Budget                          7e+06
Bond Actor Salary              600000
Name: Dr. No, dtype: object

## Set Multiple Values in DataFrame

In [13]:
bond = pd.read_csv("jamesbond.csv", index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


#### for all the times that sean connory is said we want to change that to sir sean connery

In [None]:
bond = pd.read_csv("jamesbond.csv", index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)

In [16]:
# will give boolean series where the true value indicates that sean connery is indeed the actor in the movie

actor_is_sean_connery = bond["Actor"] == "Sean Connery"

In [19]:
# here we are passing two arguments so that we target the boolean series and the actor column 
# we are able to do this because we are not performing it in a copy but a small reference of the actual data

bond.loc[actor_is_sean_connery, "Actor"] = "Sir Sean Connery"

In [20]:
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sir Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


## Rename Index Labels or Columns in a DataFrame

#### Rename one or more index labels or column labels in DF 

In [21]:
bond = pd.read_csv("jamesbond.csv", index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [24]:
# can combine mapper to axis, to axis we specify which axis we want to target (row or column)
# to mapper we pass a dictionary, in that dictionary the keys are going to represent the existing label 
# in the axis and the values for the dict keys will be the new labels we want to set for former label

bond.rename(mapper = {"GoldenEye" : "Golden Eye",
                      "The World is Not Enough" : "Best Bond Movie Ever"})


# if you want to be more explicit and say you want to target the rows u can provide axis = 0 

# all these three accomplish the same thing 
bond.rename(mapper = {"GoldenEye" : "Golden Eye",
                      "The World is Not Enough" : "Best Bond Movie Ever"}, axis = 0)

bond.rename(mapper = {"GoldenEye" : "Golden Eye",
                      "The World is Not Enough" : "Best Bond Movie Ever"}, axis = 'rows')

bond.rename(mapper = {"GoldenEye" : "Golden Eye",
                      "The World is Not Enough" : "Best Bond Movie Ever"}, axis = "index")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Golden Eye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


#### instead of using mapper we can use the parameter index 

In [30]:
# The index parameter does not need the axis so we can leave that out 

bond.rename(index = {"GoldenEye" : "Golden Eye",
                      "The World is Not Enough" : "Best Bond Movie Ever"}, inplace = True)
bond

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Golden Eye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


#### rename the column labels here 

In [35]:
# we want to specify that we want to look at the column labels so we set index = 1 
# all of these do the same thing

bond.rename(mapper = { "Year" : "Release Date", "Box Office" : "Revenue" }, axis = 1 )

bond.rename(mapper = { "Year" : "Release Date", "Box Office" : "Revenue" }, axis = 'columns' )

bond.rename(columns = { "Year" : "Release Date", "Box Office" : "Revenue" }, inplace = True)

bond.head(1)

Unnamed: 0_level_0,Release Date,Actor,Director,Revenue,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1


In [37]:
# or we can do this but this means if you want to just change a couple you would need to rewrite those that 
# you do not want to change 

bond.columns = ["Year of Release", "Actor", "Director", "Gross", "Cost", "Salary"]

bond.head(1)

Unnamed: 0_level_0,Year of Release,Actor,Director,Gross,Cost,Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1


## Delete Rows or Columns from a DataFrame

In [38]:
bond = pd.read_csv("jamesbond.csv", index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [42]:
bond.drop("A View to a Kill")

# if want to delete more than one in the rows 
bond.drop(["Die Another Day", "From Russia with Love"])

# deletes all those with casino royale in the film row
bond.drop("Casino Royale")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,


In [44]:
# to delete columns from the dataframe 

bond.drop(labels = ["Box Office", "Bond Actor Salary", "Actor"], axis = 1)

Unnamed: 0_level_0,Year,Director,Budget
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A View to a Kill,1985,John Glen,54.5
Casino Royale,2006,Martin Campbell,145.3
Casino Royale,1967,Ken Hughes,85.0
Diamonds Are Forever,1971,Guy Hamilton,34.7
Die Another Day,2002,Lee Tamahori,154.2
Dr. No,1962,Terence Young,7.0
For Your Eyes Only,1981,John Glen,60.2
From Russia with Love,1963,Terence Young,12.6
GoldenEye,1995,Martin Campbell,76.9
Goldfinger,1964,Guy Hamilton,18.6


#### Back to the original dataframe

In [52]:
bond = pd.read_csv("jamesbond.csv", index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


#### pop removes a single series but it also returns it as well so that if we assign it to a variable we will be able to see it 

In [53]:
actor = bond.pop("Actor")

In [54]:
actor

Film
A View to a Kill                      Roger Moore
Casino Royale                        Daniel Craig
Casino Royale                         David Niven
Diamonds Are Forever                 Sean Connery
Die Another Day                    Pierce Brosnan
Dr. No                               Sean Connery
For Your Eyes Only                    Roger Moore
From Russia with Love                Sean Connery
GoldenEye                          Pierce Brosnan
Goldfinger                           Sean Connery
Licence to Kill                    Timothy Dalton
Live and Let Die                      Roger Moore
Moonraker                             Roger Moore
Never Say Never Again                Sean Connery
Octopussy                             Roger Moore
On Her Majesty's Secret Service    George Lazenby
Quantum of Solace                    Daniel Craig
Skyfall                              Daniel Craig
Spectre                              Daniel Craig
The Living Daylights               Timothy Da

In [58]:
bond = pd.read_csv("jamesbond.csv", index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)


Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [59]:
# we can also use the Python del to delete the column

del bond["Director"]

In [60]:
# here we wil show that director is removed

bond.head(1)

Unnamed: 0_level_0,Year,Actor,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A View to a Kill,1985,Roger Moore,275.2,54.5,9.1


## Create Random Sample 

#### 

In [2]:
bond = pd.read_csv("jamesbond.csv", index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)


Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [5]:
bond.sample(frac = .25)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Tomorrow Never Dies,1997,Pierce Brosnan,Roger Spottiswoode,463.2,133.9,10.0
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [8]:
# extracts random rows 

bond.sample(axis = 0, n = 3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
The Spy Who Loved Me,1977,Roger Moore,Lewis Gilbert,533.0,45.1,
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [11]:
# extracts random columns 

bond.sample(axis = 'columns', n = 3)

Unnamed: 0_level_0,Actor,Year,Budget
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A View to a Kill,Roger Moore,1985,54.5
Casino Royale,Daniel Craig,2006,145.3
Casino Royale,David Niven,1967,85.0
Diamonds Are Forever,Sean Connery,1971,34.7
Die Another Day,Pierce Brosnan,2002,154.2
Dr. No,Sean Connery,1962,7.0
For Your Eyes Only,Roger Moore,1981,60.2
From Russia with Love,Sean Connery,1963,12.6
GoldenEye,Pierce Brosnan,1995,76.9
Goldfinger,Sean Connery,1964,18.6


## The .nsmallest() and .nlargest() Methods 

#### extract the rows from the dataframe that contain the smallest or largest values in a specific column 

In [12]:
bond = pd.read_csv("jamesbond.csv", index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)


Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [15]:
# we are showing the top 5 movies based on the box office value 

bond.sort_values("Box Office", ascending = False).head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3


#### Another way how to do it is: 

In [16]:
# extracts the top three movies with the largest box office 

bond.nlargest(3, columns = 'Box Office')

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [17]:
# extracts the three movies with the lowest box office 

bond.nsmallest(3 , columns = "Box Office")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6


In [18]:
# extract the movies with the largest budgets 

bond.nlargest(n = 3, columns = "Budget")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5


In [20]:
# we want the six smallest bond actor salaries 

bond.nsmallest(n = 6, columns = "Bond Actor Salary")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [23]:
# can call the largest on a series 

bond["Box Office"].nlargest(n = 8)

Film
Skyfall                  943.5
Thunderball              848.1
Goldfinger               820.4
Spectre                  726.7
Casino Royale            581.5
From Russia with Love    543.8
Moonraker                535.0
The Spy Who Loved Me     533.0
Name: Box Office, dtype: float64

In [25]:
# the five smallest years 

bond["Year"].nsmallest(n = 5)

Film
Dr. No                   1962
From Russia with Love    1963
Goldfinger               1964
Thunderball              1965
Casino Royale            1967
Name: Year, dtype: int64

## Filtering with the where Method

In [26]:
bond = pd.read_csv("jamesbond.csv", index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [30]:
mask = bond["Actor"] == "Sean Connery"

bond[mask]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [35]:
# returns the original dataframe and if it does not fit the condition (mask) then it will display NaN 
bond.where(mask)

# this gives us the dataframe that we are looking for without the NULL values 
#bond.where(mask).dropna()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,1971.0,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,,,,,,
Dr. No,1962.0,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,,,,,,
From Russia with Love,1963.0,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [37]:
bond.where(bond["Box Office"] > 800)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,,,,,,
Die Another Day,,,,,,
Dr. No,,,,,,
For Your Eyes Only,,,,,,
From Russia with Love,,,,,,
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [38]:
mask2 = bond["Box Office"] > 800

In [42]:
# we are passing two conditions 

bond.where(mask & mask2)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,,,,,,
Die Another Day,,,,,,
Dr. No,,,,,,
For Your Eyes Only,,,,,,
From Russia with Love,,,,,,
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2


## The .query() Method 

#### this is another way that we can filter through the data 

In [43]:
bond = pd.read_csv("jamesbond.csv", index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [50]:
# we need to fill in the spaces for certain columns so that the .query() method works 
# we are using list comprehension here 

bond.columns = [column_name.replace(" ", "_") for column_name in bond.columns]
bond.head(3)


Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [51]:
# we want to find the rows that have sean connery 

bond.query('Actor == "Sean Connery"')

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [53]:
# query the director column for the director Terence Young 

bond.query('Director == "Terence Young"')

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7


In [54]:
# actor does not equal Roger Moore 

bond.query('Actor != "Roger Moore"')

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,


In [58]:
# box office greater than 600 

bond.query('Box_Office > 600')

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7


In [62]:
# this extracts the row with the following condition 

bond.query('Box_Office > 600 and Actor == "Sean Connery"')

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7


In [64]:
# pull out where the actor == roger moore and director == john glen OR

bond.query('Actor == "Roger Moore" or Director == "John Glen"')
bond.query('Actor == "Roger Moore" and Director == "John Glen"')


Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8


In [66]:
# this is extracting where actor is EITHER timothy or george 

bond.query("Actor in ['Timothy Dalton', 'George Lazenby']")


Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
The Living Daylights,1987,Timothy Dalton,John Glen,313.5,68.8,5.2


In [68]:
# extracts the info where the actor IS NOT roger moore and sean connery 

bond.query("Actor not in ['Roger Moore', 'Sean Connery']")


Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
The Living Daylights,1987,Timothy Dalton,John Glen,313.5,68.8,5.2


## A Review of the .apply() Method on Single Columns 

#### On a single column/ series 

In [2]:
bond = pd.read_csv("jamesbond.csv", index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [3]:
# this function returns the number as a string and concatenates the end with Millions

def convert_to_string_and_add_millions (number):
    return str(number) + " Millions!"

In [7]:
# shows us the result of the concatenation of the function 

bond["Box Office"] = bond["Box Office"].apply(convert_to_string_and_add_millions)

In [9]:
# this will show us the millions on budget, we also overwrote it 

bond["Budget"] = bond["Budget"].apply(convert_to_string_and_add_millions)

In [10]:
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2 Millions!,54.5 Millions!,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5 Millions!,145.3 Millions!,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0 Millions!,85.0 Millions!,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5 Millions!,34.7 Millions!,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4 Millions!,154.2 Millions!,17.9


#### re execute the code in the beginning

In [11]:
bond = pd.read_csv("jamesbond.csv", index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [12]:
def convert_to_string_and_add_millions (number):
    return str(number) + " Millions!"

In [13]:
columns = ["Box Office", "Budget", "Bond Actor Salary"]

In [14]:
#for each col in the columns list, grab each col in bond so bond[col] apply the function and the overwrite the column 
# with those calculations 

for col in columns:
    bond[col] = bond[col].apply(convert_to_string_and_add_millions)

In [15]:
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2 Millions!,54.5 Millions!,9.1 Millions!
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5 Millions!,145.3 Millions!,3.3 Millions!
Casino Royale,1967,David Niven,Ken Hughes,315.0 Millions!,85.0 Millions!,nan Millions!


## The .apply() Method with Row Values 

#### combine the values in a  row in a custom function 

In [16]:
bond = pd.read_csv("jamesbond.csv", index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)

# Year is going to have index position 0, Actor will have index position 1 and so on 

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [17]:
# passing each row into the function 
# row[1] will take the actor information in the dataframe and iterate over that 
# this is going to pass on every single row that is passed 


def good_movie(row):
    
    actor = row[1]
    budget = row[4]
    
    if actor == "Pierce Brosnan":
        return "The Best"
    
    elif actor == "Roger Moore" and budget > 40:
        return "Enjoyable"
    
    else:
        return "I have no clue"

# for each row we are moving through the columns 
# for every row we are moving through our six columns, we are traversing it through a column based perspective 
bond.apply(good_movie, axis = "columns")

Film
A View to a Kill                        Enjoyable
Casino Royale                      I have no clue
Casino Royale                      I have no clue
Diamonds Are Forever               I have no clue
Die Another Day                          The Best
Dr. No                             I have no clue
For Your Eyes Only                      Enjoyable
From Russia with Love              I have no clue
GoldenEye                                The Best
Goldfinger                         I have no clue
Licence to Kill                    I have no clue
Live and Let Die                   I have no clue
Moonraker                               Enjoyable
Never Say Never Again              I have no clue
Octopussy                               Enjoyable
On Her Majesty's Secret Service    I have no clue
Quantum of Solace                  I have no clue
Skyfall                            I have no clue
Spectre                            I have no clue
The Living Daylights               I have no 

## The .copy() Method

In [18]:
bond = pd.read_csv("jamesbond.csv", index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)


Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


#### creates a copy of an existing object (DataFrame or Series) but stores it seperately in memory 

In [21]:
directors = bond["Director"]

In [22]:
directors.head(3)

Film
A View to a Kill          John Glen
Casino Royale       Martin Campbell
Casino Royale            Ken Hughes
Name: Director, dtype: object

In [24]:
# change John Glen ro Mr.John Glen but this will affect the original dataframe and overwrite it because we are not 
# working with a copy 

directors["A View to a Kill"] = "Mr. John Glen"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  directors["A View to a Kill"] = "Mr. John Glen"


In [25]:
bond = pd.read_csv("jamesbond.csv", index_col = 'Film')
bond.sort_index(inplace = True)
bond.head(3)


Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [26]:
# this is a seperate series from our bond dataframe 

directors = bond["Director"].copy()

directors.head(3)

Film
A View to a Kill          John Glen
Casino Royale       Martin Campbell
Casino Royale            Ken Hughes
Name: Director, dtype: object

In [31]:
directors["A View to a Kill"] = "Mister John Glen"

In [32]:
directors.head(3)

Film
A View to a Kill    Mister John Glen
Casino Royale        Martin Campbell
Casino Royale             Ken Hughes
Name: Director, dtype: object