## Setting/resetting index, extracting, resetting values, etc.

In [1]:
import pandas as pd

In [2]:
bond = pd.read_csv("jamesbond.csv")

In [3]:
bond
bond.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 7 columns):
Film                 26 non-null object
Year                 26 non-null int64
Actor                26 non-null object
Director             26 non-null object
Box Office           26 non-null float64
Budget               26 non-null float64
Bond Actor Salary    18 non-null float64
dtypes: float64(3), int64(1), object(3)
memory usage: 1.5+ KB


## The .set_index() and .reset_index() Methods
drop = False is default. 

In [4]:
bond = pd.read_csv("jamesbond.csv")  # could add index_col=
bond.head(3)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [5]:
bond.set_index(keys = "Film", inplace = True) # move film column to be index

In [6]:
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [7]:
# Move current index back to column position and generate a new index
# Drop= False will not drop existing index
bond.reset_index(drop = False, inplace = True)
bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [8]:
# Let's say you don't want film as index but Year instead as index
# First must .reset_index(). If you go straight to changing index by 
# .set_index() again, it'll completely remove the prior index. 
# Therefore, MUST .reset_index() first and then you can .set_index()
bond.reset_index(inplace = True)
bond.set_index(keys = "Year", inplace = True)
bond.head(3)


Unnamed: 0_level_0,index,Film,Actor,Director,Box Office,Budget,Bond Actor Salary
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1962,0,Dr. No,Sean Connery,Terence Young,448.8,7.0,0.6
1963,1,From Russia with Love,Sean Connery,Terence Young,543.8,12.6,1.6
1964,2,Goldfinger,Sean Connery,Guy Hamilton,820.4,18.6,3.2


## Retrieve rows by Index LABEL with .loc[]
Using .sort_index() can really speed up extracting rows from a dataset, especially if extracting on strings. The structure is good for pandas.

In [9]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [10]:
bond.loc["Goldfinger"]  # Returns a new Series if single value
bond.loc["GoldenEye"]
#If you have duplicate values "Casino Royale", then return DF
bond.loc["Casino Royale"]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [11]:
# Whenever extracting STRINGS, the end value is INCLUDED.
bond.loc["Diamonds Are Forever" : "Moonraker"]
bond.loc["GoldenEye" :]
bond.loc[: "On Her Majesty's Secret Service"]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [12]:
# Extract multiple non-sequential values
bond.loc[["Moonraker", "Octopussy"]]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8


In [13]:
# If at least one value exists and another doesn't, it'll return
# the DF but 
bond.loc[["For Your Eyes Only", "Live and Let Die", "Gold Bond"]]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
For Your Eyes Only,1981.0,Roger Moore,John Glen,449.4,60.2,
Live and Let Die,1973.0,Roger Moore,Guy Hamilton,460.3,30.8,
Gold Bond,,,,,,


In [14]:
"Gold Bond" in bond.index

False

## Retrieve rows by Index POSITION with iloc[]
This is the one where the index position changes based on whether the original DF has been sorted or filtered, etc. I think this is because your filters or sorting or index_col, etc. returns a NEW DF, so iloc[] adjusts to the new DF. Even if new DF index is a STRING object.


In [15]:
bond = pd.read_csv("jamesbond.csv")
bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [16]:
# Typical numeric index, the loc and iloc will act the same
bond.loc[15]
bond.iloc[15]

# Because iloc extracts using a NUM, the last value is NOT included
bond.iloc[[15, 20]]
bond.loc[[15, 20]]
bond.iloc[:4]
bond.iloc[4:8]
bond.iloc[20:]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
20,The World Is Not Enough,1999,Pierce Brosnan,Michael Apted,439.5,158.3,13.5
21,Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
22,Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
23,Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
24,Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
25,Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,


In [17]:
# Changing index column and sorting index
# Behind the scenes there is still an INDEX POSITION we don't see
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [18]:
# Can still use iloc
bond.iloc[:18]
bond.iloc[18:]
bond.iloc[[5, 10, 15, 20]]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,


## The Catch-All .ix[] Method
Basically combines the functionality of loc and iloc. **Teacher's preferred choice**. However, it's getting deprecated! Hah!

In [19]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [20]:
bond.ix["GoldenEye"]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Year                            1995
Actor                 Pierce Brosnan
Director             Martin Campbell
Box Office                     518.5
Budget                          76.9
Bond Actor Salary                5.1
Name: GoldenEye, dtype: object

## Second Arguments to .loc[], .iloc[], and .ix[] Methods
[ROW, COLUMN]

In [21]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [22]:
bond.loc["Moonraker", "Actor"]
bond.loc["Moonraker", ["Actor", "Budget", "Year"]]

# **Columns also have their own INDEX positions
bond.iloc[14, 2]   # [row, col]
bond.iloc[14, 2:5]
bond.iloc[14, [5, 3, 2]]

Bond Actor Salary          7.8
Box Office               373.8
Director             John Glen
Name: Octopussy, dtype: object

In [23]:
bond.ix[20, "Budget"]
bond.ix["The Man with the Golden Gun", :4]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Year                  1974
Actor          Roger Moore
Director      Guy Hamilton
Box Office             334
Name: The Man with the Golden Gun, dtype: object

## Set new values for a specific cell or row

In [24]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [25]:
bond.loc["Dr. No"]

Year                          1962
Actor                 Sean Connery
Director             Terence Young
Box Office                   448.8
Budget                           7
Bond Actor Salary              0.6
Name: Dr. No, dtype: object

In [26]:
bond.loc["Dr. No", "Actor"] = "Sir Sean Connery"  # Permanent change

In [27]:
bond.loc["Dr. No", ["Box Office", "Budget", "Bond Actor Salary"]]
bond.loc["Dr. No", ["Box Office", "Budget", "Bond Actor Salary"]] = [44800000, 7000000, 600000]

In [28]:
# We put in INT but converted to FLOAT because there are NaN in Budget
# column, so we'd have to fill NaN and then .astype to convert to INT
bond.loc["Dr. No", "Budget"]

7000000.0

## Set multiple values in a DataFrame

In [29]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [32]:
mask = bond["Actor"] == "Sean Connery"

In [46]:
bond.loc[mask]  # These are original rows, not a copy
bond.loc[mask, "Actor"]  # Accessing the Actor column and rows from original DF

Film
Diamonds Are Forever     Sean Connery
Dr. No                   Sean Connery
From Russia with Love    Sean Connery
Goldfinger               Sean Connery
Never Say Never Again    Sean Connery
Thunderball              Sean Connery
You Only Live Twice      Sean Connery
Name: Actor, dtype: object

In [47]:
bond.loc[mask, "Actor"] = "Sir Sean Connery"   # This changes original

In [49]:
bond.loc[bond["Actor"] == "Roger Moore"]  

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8
The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,
The Spy Who Loved Me,1977,Roger Moore,Lewis Gilbert,533.0,45.1,


## Replace values with .replace() Method
to_replace = [], value = []

In [50]:
bond["Actor"].replace(to_replace = "Sean Connery", 
                      value = "Sir Sean Connery", 
                     inplace = True)

In [53]:
bond[["Actor", "Director"]].replace(to_replace = ["Roger Moore", "John Glen"],
                                    value = ["Actor Gaylon", "Director Gaylon"])

Unnamed: 0_level_0,Actor,Director
Film,Unnamed: 1_level_1,Unnamed: 2_level_1
A View to a Kill,Actor Gaylon,Director Gaylon
Casino Royale,Daniel Craig,Martin Campbell
Casino Royale,David Niven,Ken Hughes
Diamonds Are Forever,Sir Sean Connery,Guy Hamilton
Die Another Day,Pierce Brosnan,Lee Tamahori
Dr. No,Sir Sean Connery,Terence Young
For Your Eyes Only,Actor Gaylon,Director Gaylon
From Russia with Love,Sir Sean Connery,Terence Young
GoldenEye,Pierce Brosnan,Martin Campbell
Goldfinger,Sir Sean Connery,Guy Hamilton


## Rename index labels or columns in a DataFrame with .rename()
.rename(columns = {})

In [54]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [55]:
bond.rename(columns = {"Year" : "Release Date",
                      "Box Office" : "Revenue"}, inplace = True)

In [56]:
bond.head()

Unnamed: 0_level_0,Release Date,Actor,Director,Revenue,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [58]:
bond.rename(index = {"Dr. No" : "Doctor No", 
                    "Goldeneye" : "Golden Eye",
                    "The World Is Not Enough" : "Best Bond Movie Ever"},
           inplace = True)

In [59]:
bond.loc["Best Bond Movie Ever"]

Release Date                   1999
Actor                Pierce Brosnan
Director              Michael Apted
Revenue                       439.5
Budget                        158.3
Bond Actor Salary              13.5
Name: Best Bond Movie Ever, dtype: object

In [66]:
# Another way to assign new values to columns is using column attribute
# Can't do a single column with .columns (use .replace instead)
bond.columns = ["Year of Release", "Actor", "Director", "Gross",
               "Cost", "Salary"]
bond.head()

Unnamed: 0_level_0,Year of Release,Actor,Director,Gross,Cost,Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


## Delete rows and columns with .drop() and .pop() Methods and the del keyword
axis=0 looks at ROWS. .pop() removes single series or single column from a dataframe. .pop() is permanent AND it returns the series. You can store it into a new variable.

In [92]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [74]:
# Returns a DF but not permanent until inplace=True
bond.drop(["A View to a Kill", "Die Another Day", "From Russia with Love"])

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,


In [76]:
# Let's remove COLUMNS. Need to change axis=1 or axis="columns"
bond.drop(labels = ["Box Office", "Actor"], axis = 1)

Unnamed: 0_level_0,Year,Director,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A View to a Kill,1985,John Glen,54.5,9.1
Casino Royale,2006,Martin Campbell,145.3,3.3
Casino Royale,1967,Ken Hughes,85.0,
Diamonds Are Forever,1971,Guy Hamilton,34.7,5.8
Die Another Day,2002,Lee Tamahori,154.2,17.9
Dr. No,1962,Terence Young,7.0,0.6
For Your Eyes Only,1981,John Glen,60.2,
From Russia with Love,1963,Terence Young,12.6,1.6
GoldenEye,1995,Martin Campbell,76.9,5.1
Goldfinger,1964,Guy Hamilton,18.6,3.2


In [86]:
# .pop() removes a single series/column, is permanent, and returns
actor = bond.pop("Actor")

In [87]:
bond

Unnamed: 0_level_0,Year,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A View to a Kill,1985,John Glen,275.2,54.5,9.1
Casino Royale,2006,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,John Glen,449.4,60.2,
From Russia with Love,1963,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Guy Hamilton,820.4,18.6,3.2


In [88]:
actor

Film
A View to a Kill                      Roger Moore
Casino Royale                        Daniel Craig
Casino Royale                         David Niven
Diamonds Are Forever                 Sean Connery
Die Another Day                    Pierce Brosnan
Dr. No                               Sean Connery
For Your Eyes Only                    Roger Moore
From Russia with Love                Sean Connery
GoldenEye                          Pierce Brosnan
Goldfinger                           Sean Connery
Licence to Kill                    Timothy Dalton
Live and Let Die                      Roger Moore
Moonraker                             Roger Moore
Never Say Never Again                Sean Connery
Octopussy                             Roger Moore
On Her Majesty's Secret Service    George Lazenby
Quantum of Solace                    Daniel Craig
Skyfall                              Daniel Craig
Spectre                              Daniel Craig
The Living Daylights               Timothy Da

In [99]:
# Adding series back to DF
bond.insert(loc = 2, column = "Actor", value = actor,
           allow_duplicates = True)
bond.head()

Unnamed: 0_level_0,Actor,Box Office,Actor,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A View to a Kill,Roger Moore,275.2,Roger Moore,54.5,9.1
Casino Royale,Daniel Craig,581.5,Daniel Craig,145.3,3.3
Casino Royale,David Niven,315.0,David Niven,85.0,
Diamonds Are Forever,Sean Connery,442.5,Sean Connery,34.7,5.8
Die Another Day,Pierce Brosnan,465.4,Pierce Brosnan,154.2,17.9


In [95]:
del bond["Director"]

In [97]:
del bond["Year"]

In [100]:
bond.head()

Unnamed: 0_level_0,Actor,Box Office,Actor,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A View to a Kill,Roger Moore,275.2,Roger Moore,54.5,9.1
Casino Royale,Daniel Craig,581.5,Daniel Craig,145.3,3.3
Casino Royale,David Niven,315.0,David Niven,85.0,
Diamonds Are Forever,Sean Connery,442.5,Sean Connery,34.7,5.8
Die Another Day,Pierce Brosnan,465.4,Pierce Brosnan,154.2,17.9


## Create random sample with .sample() Method
Say you want to extract one row at random

In [101]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [105]:
bond.sample(n = 5)   # totally random process. no sequence. new DF.

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
The Spy Who Loved Me,1977,Roger Moore,Lewis Gilbert,533.0,45.1,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [107]:
# frac= % of dataframe. More adjustable dependent on your DF.
bond.sample(frac = .25)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8
The World Is Not Enough,1999,Pierce Brosnan,Michael Apted,439.5,158.3,13.5
Tomorrow Never Dies,1997,Pierce Brosnan,Roger Spottiswoode,463.2,133.9,10.0


In [111]:
bond.sample(n = 3, axis = "columns") #axis = 1 or "columns"

Unnamed: 0_level_0,Director,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A View to a Kill,John Glen,54.5,9.1
Casino Royale,Martin Campbell,145.3,3.3
Casino Royale,Ken Hughes,85.0,
Diamonds Are Forever,Guy Hamilton,34.7,5.8
Die Another Day,Lee Tamahori,154.2,17.9
Dr. No,Terence Young,7.0,0.6
For Your Eyes Only,John Glen,60.2,
From Russia with Love,Terence Young,12.6,1.6
GoldenEye,Martin Campbell,76.9,5.1
Goldfinger,Guy Hamilton,18.6,3.2


## The .nsmallest() and nlargest() Methods
Allows you to extract rows in a DF that contain the smallest or largest values in a particular column(s)

In [112]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [115]:
# Three highest grossing movies
bond.sort_values("Box Office", ascending = False).head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [116]:
bond.nlargest(n = 3, columns = "Box Office")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [117]:
bond.nsmallest(n = 2, columns = "Box Office")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1


In [118]:
bond.nlargest(n = 3, columns = "Budget")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5


In [119]:
bond.nsmallest(n = 6, columns = "Bond Actor Salary")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [120]:
bond["Box Office"].nlargest(5)

Film
Skyfall          943.5
Thunderball      848.1
Goldfinger       820.4
Spectre          726.7
Casino Royale    581.5
Name: Box Office, dtype: float64

In [121]:
bond["Year"].nsmallest(2)

Film
Dr. No                   1962
From Russia with Love    1963
Name: Year, dtype: int64

In [122]:
bond.nlargest(n = 3, columns = ["Box Office", "Budget"])

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [123]:
bond["Budget"].nlargest()

Film
Spectre                    206.3
Quantum of Solace          181.4
Skyfall                    170.2
The World Is Not Enough    158.3
Die Another Day            154.2
Name: Budget, dtype: float64

## Filtering with the WHERE Method
returns complete original DF but those rows that match criteria are filled with null values. Good keeping original DF.

In [125]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [127]:
# Extract rows where actor = Sean Connery
mask = bond["Actor"] == "Sean Connery"
bond[mask]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [134]:
# (cond = boolean series, other=nan)
bond.where(mask)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,1971.0,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,,,,,,
Dr. No,1962.0,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,,,,,,
From Russia with Love,1963.0,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [151]:
bond.where(bond["Box Office"] > 800)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,,,,,,
Die Another Day,,,,,,
Dr. No,,,,,,
For Your Eyes Only,,,,,,
From Russia with Love,,,,,,
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [152]:
mask2 = bond["Box Office"] > 800

In [154]:
bond.where(cond = mask & mask2)  # can do multiple conditions

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,,,,,,
Die Another Day,,,,,,
Dr. No,,,,,,
For Your Eyes Only,,,,,,
From Russia with Love,,,,,,
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2


## The .query() Method and temporary variables! 
Argument is a STR and only works when columns in DF don't have any spaces! 

In [155]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


### Temporary variables in Python!
**Useful stuff** looks like you put it in brackets
Ex. [column_name.replace(" ", "_") for column_name in bond.columns.
*First: bond.columns
Then:     [column_name.replace(.....) for...
Next: assign to attribute bond.columns = [column_name.replace(...]

In [165]:
bond.columns = [column_name.replace(" ", "_") for column_name in bond.columns]
bond.head(1)

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1


In [170]:
# Now we can finally use .query(). Notice the '""'
# Supposedly pretty fast for processing but not often used
bond.query('Actor == "Sean Connery"')
bond.query("Director == 'Ken Hughes'")
bond.query("Actor != 'Roger Moore'")

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,


In [172]:
bond.query("Box_Office > 600")

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7


In [175]:
# actor is roger moore and director john glen
bond.query("Actor == 'Roger Moore' or Director == 'John Glen'")
bond.query("Actor == 'Roger Moore' and Director == 'John Glen'")

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8


In [177]:
# All actors except X and Y
bond.query("Actor in ['Timothy Dalton', 'George Lazenby']")
bond.query("Actor not in ['Sean Connery', 'Roger Moore']")

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
The Living Daylights,1987,Timothy Dalton,John Glen,313.5,68.8,5.2


## A review of the .apply() Method on single columns
Applies (like a broadcast) to all rows in a series. Good for CUSTOM function that no existing pandas function can perform. No INPLACE argument.

In [224]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [220]:
# Let's take box office, budget, and actor salary columns and convert
# to a string and then add the word "millions" at the end
def convert_to_string_and_add_millions(number):
    return str(number) + " MILLIONS!"

#bond["Box Office"] = bond["Box Office"].apply(func = convert_to_string_and_add_millions)

In [222]:
bond["Budget"]
bond["Budget"].apply(func = convert_to_string_and_add_millions)
bond["Budget"] = bond["Budget"].apply(func = convert_to_string_and_add_millions)  # to make permanent

In [227]:
# A more elegant syntax. Refresh original DF above. 
#This won't work as .apply() only applies to one single series
# not a list of columns or series.
# bond[columns].apply(func = convert_to_string_and_add_millions)

columns = ["Box Office", "Budget", "Bond Actor Salary"]
bond[columns]
for col in columns:
    bond[col] = bond[col].apply(func = convert_to_string_and_add_millions)

In [228]:
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2 MILLIONS!,54.5 MILLIONS!,9.1 MILLIONS!
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5 MILLIONS!,145.3 MILLIONS!,3.3 MILLIONS!
Casino Royale,1967,David Niven,Ken Hughes,315.0 MILLIONS!,85.0 MILLIONS!,nan MILLIONS!
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5 MILLIONS!,34.7 MILLIONS!,5.8 MILLIONS!
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4 MILLIONS!,154.2 MILLIONS!,17.9 MILLIONS!


## The .apply() Method with row values
In order to pass every row to the function, we call .apply() on the dataframe but with AXIS = "COLUMNS"

In [292]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [293]:
# Want to create a ranking system based on various values
# Let's create custom function. Pandas will past them as an array/list
# So there will be index positions for each row. The method is
# expecting to receive a row from the dataframe as its input.
# In order to pass every row to the function, we call .apply() on
# the dataframe but with AXIS = "COLUMNS"

def good_movie(row):
    
    actor = row[1]
    budget = row[4]
    
    if actor == "Pierce Brosnan":
        return "The best"
    elif actor == "Roger Moore" and budget > 40:
        return "Enjoyable"
    else:
        return "I have no clue"
    
bond.apply(func = good_movie, axis = "columns")

Film
A View to a Kill                        Enjoyable
Casino Royale                      I have no clue
Casino Royale                      I have no clue
Diamonds Are Forever               I have no clue
Die Another Day                          The best
Dr. No                             I have no clue
For Your Eyes Only                      Enjoyable
From Russia with Love              I have no clue
GoldenEye                                The best
Goldfinger                         I have no clue
Licence to Kill                    I have no clue
Live and Let Die                   I have no clue
Moonraker                               Enjoyable
Never Say Never Again              I have no clue
Octopussy                               Enjoyable
On Her Majesty's Secret Service    I have no clue
Quantum of Solace                  I have no clue
Skyfall                            I have no clue
Spectre                            I have no clue
The Living Daylights               I have no 

In [294]:
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [301]:
# Practice scenario. Create a custom formula that categorizes movies
# based on profit ("High", "Medium", "Low")
# New columns: Box Office - Budget - Bond Actor Salary
# < 200 = "Lower" ; < 400 & >= 200 = "Medium"; >= 400 "High"
#Creat two new columns: Profit and Profit Rank
bond["Profit"] = bond["Box Office"] - bond["Budget"] - bond["Bond Actor Salary"]
bond["Profit Rank"] = "Null"
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary,Profit,Profit Rank
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1,211.6,Null
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3,432.9,Null
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,,,Null
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8,402.0,Null
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9,293.3,Null


# Let's try to update the column value directly rather than using just RETURN

def profitability(row):
    
    profit = row[6]
    profitability = row[8]
    
    if profit <= 200:
        profitability = "Lower"
    elif profit > 200 and profit <= 400:
        profitability = "Medium"
    elif profit > 400:
        profitability = "High"
    else:
        profitability = "Unknown"

bond.apply(func = profitability, axis = 1)

In [304]:
def profit_rank(row):
    
    if row["Profit"] <= 200:
        return "Lower"
    elif row["Profit"] > 200 and row["Profit"] <= 400:
        return "Medium"
    elif row["Profit"] > 400:
        return "High"
    else:
        return "NaN"

bond.apply(func = profit_rank, axis = 1)
bond["Profit Rank"] = bond.apply(func = profit_rank, axis = 1)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary,Profit,Profit Rank
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1,211.6,Medium
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3,432.9,High
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,,,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8,402.0,High
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9,293.3,Medium


### This doesn't work. Returns None.
def profit_rank(row):
    
    if row["Profit"] <= 200:
        row["Profit Rank"] = "Lower"
    elif row["Profit"] > 200 and row["Profit"] <= 400:
        row["Profit Rank"] = "Medium"
    elif row["Profit"] > 400:
        row["Profit Rank"] = "High"
    else:
        row["Profit Rank"] = "NaN"

bond.apply(func = profit_rank, axis = 1)
bond["Profit Rank"] = bond.apply(func = profit_rank, axis = 1)
bond.head()

## The .copy() Method
Stores it completely separately in memory. Copies an existing pandas DF object (DF or series). Works with dataframes too!

In [306]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [311]:
# Let's assign this series to a new variable
directors = bond["Director"]
directors.head()

Film
A View to a Kill              John Glen
Casino Royale           Martin Campbell
Casino Royale                Ken Hughes
Diamonds Are Forever       Guy Hamilton
Die Another Day            Lee Tamahori
Name: Director, dtype: object

In [313]:
# Now let's change one of these values
directors["A View to a Kill"] = "Mister John Glen"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [314]:
directors.head(3)

Film
A View to a Kill    Mister John Glen
Casino Royale        Martin Campbell
Casino Royale             Ken Hughes
Name: Director, dtype: object

In [317]:
# Notice how changing the "copied" series of the DF (directors) has also
# changed the original bond DF? 
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,Mister John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [329]:
# What if we don't want it to overwrite the original series in the DF?
# This is when the .copy() method is handy.
bond["Director"]
directors["A View to a Kill"] = "John Glen"
directors.head()
bond.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [332]:
# Got everything back to normal/original. Now for .copy()
directors = bond["Director"].copy()  # This is a brand new isolated series object
directors.head()

Film
A View to a Kill              John Glen
Casino Royale           Martin Campbell
Casino Royale                Ken Hughes
Diamonds Are Forever       Guy Hamilton
Die Another Day            Lee Tamahori
Name: Director, dtype: object

In [339]:
# Can also use .replace() or simple = 
#directors["A View to a Kill"] = "Mister John Glen"
directors.replace(to_replace = "A View to a Kill", value = "Mister John Glen", inplace = True)
directors.head()

Film
A View to a Kill        Mister John Glen
Casino Royale            Martin Campbell
Casino Royale                 Ken Hughes
Diamonds Are Forever        Guy Hamilton
Die Another Day             Lee Tamahori
Name: Director, dtype: object

In [340]:
# Original df wasn't impacted because .copy() is a separate object
bond["Director"].head()

Film
A View to a Kill              John Glen
Casino Royale           Martin Campbell
Casino Royale                Ken Hughes
Diamonds Are Forever       Guy Hamilton
Die Another Day            Lee Tamahori
Name: Director, dtype: object