# Importing Pandas and DataFrame

In [None]:
import pandas as pd

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv")
)

bond.head(3)

# The <code>set_index()</code> and <code>reset_index()</code> Methods

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv")
)

# You can choose a key in which to use as the index for a DataFrame.
bond.set_index(keys = "Film", inplace = True)
bond.head(3)

In [None]:
# Resets the DataFrame's index back to the standard sorted numerical index.
bond.reset_index(inplace = True)
bond.head(3)

In [None]:
bond.set_index("Film", inplace = True)
bond.head(3)

In [None]:
# By default, replaces the previous index value and drops it out of memory.
bond.set_index("Year")

In [None]:
# Combining the two methods prevents the automatic drop from the DataFrame.
bond.reset_index(inplace = True)
bond.set_index("Year", inplace = True)
bond.head(3)

# Retrieve Rows by Index Label with <code>.loc[]</code> Accessor

In [None]:
# By sorting the index before accessing values, we can see a speed improvement over large datasets.
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv",
    index_col = "Film")
)
bond.sort_index(inplace = True)
bond.head(3)

In [None]:
# Index Label must exist or you will get a Key Error.
bond.loc["Goldfinger"]
bond.loc["GoldenEye"]

In [None]:
# If there are multiple rows with the Index Label, a DataFrame will be returned.
bond.loc["Casino Royale"]

In [None]:
# The .loc[] Accessor can handle List Slices with step intervals as well.
bond.loc["Diamonds Are Forever" : "From Russia with Love"]
bond.loc["Diamonds Are Forever" : "From Russia with Love" : 2]

In [None]:
# Selects rows from "GoldenEye" to the end of the DataFrame.
bond.loc["GoldenEye" :]

In [None]:
# Selects Rows from the DataFrame up to "On Her Majesty's Secret Service"
bond.loc[: "On Her Majesty's Secret Service"]

In [None]:
# Pulls out multiple rows at once by passing in a list.
bond.loc[["Die Another Day", "Octopussy"]]

# Retrieve Rows by Index Position with <code>.iloc[]</code> Accessor

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv",)
)

bond.head(3)

In [None]:
# Returns a series based on the index number, rather than index label.
bond.iloc[0]
bond.iloc[15]

In [None]:
# Also supports multiple rows by passing in a list.
bond.iloc[[15, 20]]

In [None]:
# iloc[] also supports List Slices too. Note, iloc[] is not inclusive like loc[]
bond.iloc[4:8]

In [None]:
# Sets index to the film name and sorts it.
bond.set_index("Film", inplace = True)
bond.sort_index(inplace = True)
bond.head(3)

In [None]:
# Works normally.
bond.loc["GoldenEye"]

In [None]:
# Even though the index is a string, they still have index values.
bond.iloc[0]

# Second Arguments to <code>.loc[]</code> and <code>.iloc[]</code> Accessors

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv",
    index_col = "Film")
)
bond.sort_index(inplace = True)
bond.head(3)

In [None]:
bond.loc["Moonraker"]

In [None]:
# To specify which columns to pull out, we can use the following syntax:
bond.loc["Moonraker", "Actor"]
bond.loc["Moonraker", "Director"]
bond.loc["Moonraker", ["Director", "Box Office"]]
bond.loc[["Moonraker", "A View to a Kill"], ["Director", "Box Office"]]

In [None]:
# Pandas supports list slicing syntax options here also:
bond.loc["Moonraker", "Director" : "Budget"]
bond.loc["Moonraker" : "Thunderball", "Director" : "Budget"]
bond.loc["Moonraker" : , "Director" :]
bond.loc[ : "Moonraker", : "Budget"]

In [None]:
# The same can be done with iloc, except we use index positions rather than values.
# Remember that iloc is exclusive, unlike loc.
bond.iloc[14]
bond.iloc[14, 2]
bond.iloc[14, 2:5]
bond.iloc[[14, 17], [2, 4]]
bond.iloc[:7, [2, 4]]
bond.iloc[:7, :3]

# Set New Value for a Specific Cell

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv",
    index_col = "Film")
)

bond.sort_index(inplace = True)
bond.head(3)

In [None]:
# We can overwrite the value by simply assigning it a new one.
bond.loc["Dr. No"]
bond.loc["Dr. No", "Actor"]
bond.loc["Dr. No", "Actor"] = "Sir Sean Connery"
bond.loc["Dr. No", "Actor"]

In [None]:
bond.loc["Dr. No", ["Box Office", "Budget", "Bond Actor Salary"]] = [448000000, 7000000, 600000]
bond.loc["Dr. No"]

# Set Multiple Values in DataFrame

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv",
    index_col = "Film")
)

bond.sort_index(inplace = True)
bond.head(3)

In [None]:
actor_is_sean_connery = bond["Actor"] == "Sean Connery"

In [None]:
# Change all values of Sean Connery in the DataFrame to Sir Sean Connery
bond.loc[actor_is_sean_connery, "Actor"] = "Sir Sean Connery"
bond.head()

# Rename Index Labels or Columns in a DataFrame

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv",
    index_col = "Film")
)

bond.sort_index(inplace = True)
bond.head(3)

In [None]:
# Mapper by default looks for the 0th row. Don't forget to use the axis parameter if you
# want to rename on other rows!
bond.rename(mapper = {"GoldenEye" : "Golden Eye", "The World Is Not Enough" : "Best Bond Movie Ever"})
bond.rename(mapper = {"GoldenEye" : "Golden Eye", "The World Is Not Enough" : "Best Bond Movie Ever"}, axis = 0)
bond.rename(mapper = {"GoldenEye" : "Golden Eye", "The World Is Not Enough" : "Best Bond Movie Ever"}, axis = "rows")
bond.rename(mapper = {"GoldenEye" : "Golden Eye",  "The World Is Not Enough" : "Best Bond Movie Ever"}, axis = "index")

In [None]:
# Index will do the same as mapper, but does not require the axis parameter.
bond.rename(index = {"GoldenEye" : "Golden Eye", "The World Is Not Enough" : "Best Bond Movie Ever"}, inplace = True)
bond

In [None]:
# Renaming columns follows the same logic as renaming rows.
bond.rename(mapper = {"Year" : "Release Date", "Box Office" : "Revenue"}, axis = 1)
bond.rename(mapper = {"Year" : "Release Date", "Box Office" : "Revenue"}, axis = "columns")
bond.rename(columns = {"Year" : "Release Date", "Box Office" : "Revenue"})
bond.rename(columns = {"Year" : "Release Date", "Box Office" : "Revenue"}, inplace = True)
bond.head(1)

In [None]:
# We can also overwrite names for columns by using the columns attribute.
# Note: You must provide a list of equal length.
bond.columns = ["Year of Release", "Actor", "Director", "Gross", "Cost", "Salary"]
bond.head(1)

# Delete Rows or Columns from a DataFrame

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv",
    index_col = "Film")
)

bond.sort_index(inplace = True)
bond.head(3)

In [None]:
# You can drop rows from a DataFrame simply by passing in the Index Label or Position.
bond.drop("A View to a Kill")
bond.drop(["A View to a Kill", "Die Another Day", "From Russia with Love"])
bond.drop("Casino Royale")

In [None]:
# The same can be done with columns, specifying axis = 1.
bond.drop(["Box Office", "Bond Actor Salary", "Actor"], axis = 1, inplace = True)
bond.head()

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv",
    index_col = "Film")
)

bond.sort_index(inplace = True)
bond.head(3)

In [None]:
# Pop will remove the passed in row/column as well as return it.
# This will allow the ability to store the row/column.
actor = bond.pop("Actor")

In [None]:
bond
actor

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv",
    index_col = "Film")
)

bond.sort_index(inplace = True)
bond.head(3)

In [None]:
# We can use Python's del keyword to remove columns/rows as well.
del bond["Director"]

In [None]:
bond

# Create Random Sample

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv",
    index_col = "Film")
)

bond.sort_index(inplace = True)
bond.head(3)

In [None]:
# The .sample() method by default returns a random row.
bond.sample()
bond.sample(n = 5)
bond.sample(frac = .25)

In [None]:
# The .sample() method can also be used to pull random columns by using the axis parameter.
bond.sample(axis = 1)
bond.sample(n = 5, axis = "columns")
bond.sample(frac = .25, axis = 1)

# The <code>.nsmallest()</code> and <code>.nlargest()</code> Methods

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv",
    index_col = "Film")
)

bond.sort_index(inplace = True)
bond.head(3)

In [None]:
# Find the three largest grosses in the Box Office column.
bond.sort_values("Box Office", ascending = False).head(3)

In [None]:
# This can also be done using the .nlargest() method. Can be more efficient than
# sorting an entire DataFrame.
bond.nlargest(3, "Box Office")

In [None]:
# We can also find the smallest numbers by using the .nsmallest() method.
bond.nsmallest(n = 2, columns = "Box Office")

In [None]:
bond.nlargest(3, columns = "Budget")
bond.nsmallest(n = 6, columns = "Bond Actor Salary")

In [None]:
# These methods can also be called directly on a Series.
bond["Box Office"].nlargest(8)
bond["Year"].nsmallest(2)

# Filtering a DataFrame with the <code>.where()</code> Method

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv",
    index_col = "Film")
)

bond.sort_index(inplace = True)
bond.head(3)

In [None]:
mask = bond["Actor"] == "Sean Connery"
bond[mask]

In [None]:
# Returns the entire DataFrame with valid values related to our condition.
# Everthing else will be filled in with NaN.
bond.where(mask)

In [None]:
bond.where(bond["Box Office"] > 800)

In [None]:
mask2 = bond["Box Office"] > 800
bond.where(mask & mask2)

# Filtering a DataFrame with the <code>.query()</code> Method

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv",
    index_col = "Film")
)

bond.sort_index(inplace = True)
bond.head(3)

In [None]:
# The .query() method doesn't like space in column names.
bond.columns = [column_name.replace(" ", "_") for column_name in bond.columns]
bond.head(1)

In [None]:
bond.query('Actor == "Sean Connery"')
bond.query("Director == 'Terence Young'")
bond.query("Actor != 'Roger Moore'")

In [None]:
bond.query("Box_Office > 600")
bond.query("Actor == 'Roger Moore' and Director == 'John Glen'")
bond.query("Actor == 'Roger Moore' or Director == 'John Glen'")

In [None]:
# The .query() method supports in and not in as well.
bond.query("Actor in ['Timothy Dalton', 'George Lazenby']")
bond.query("Actor not in ['Sean Connery', 'Roger Moore']")

# A Review of the <code>.apply()</code> Method on Single Columns

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv",
    index_col = "Film")
)

bond.sort_index(inplace = True)
bond.head(3)

In [None]:
def convert_to_string_and_add_millions(number):
    return str(number) + " MILLIONS!"

bond["Box Office"] = bond["Box Office"].apply(convert_to_string_and_add_millions)
bond["Budget"] = bond["Budget"].apply(convert_to_string_and_add_millions)

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv",
    index_col = "Film")
)

bond.sort_index(inplace = True)
bond.head(3)

In [None]:
columns = ["Box Office", "Budget", "Bond Actor Salary"]

for col in columns:
    bond[col] = bond[col].apply(convert_to_string_and_add_millions)

In [None]:
bond.head(3)

# The <code>.apply()</code> Method with Row Values

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv",
    index_col = "Film")
)

bond.sort_index(inplace = True)
bond.head(3)

In [None]:
def good_movie(row):
    actor = row[1]
    budget = row[4]
    
    if actor == "Pierce Brosnan":
        return "The Best"
    elif actor == "Roger Moore" and budget > 40:
        return "Enjoyable"
    else:
        return "I Have No Clue"

# Can be confusing, axis is "columns" because we are moving horizontally per row.
bond.apply(good_movie, axis = "columns")

# The <code>.copy()</code> Method

In [None]:
bond = (
    pd.read_csv("C:/Users/Maverick/Documents/git/Data-Analysis-With-Pandas-And-Python/datasets/jamesbond.csv",
    index_col = "Film")
)

bond.sort_index(inplace = True)
bond.head(3)

In [None]:
directors = bond["Director"].copy()
directors.head(3)

In [None]:
directors["A View to a Kill"] = "Mister John Glen"

In [None]:
directors.head(3)

In [None]:
bond.head(3)