In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date", "Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df.head(3)

## Filter A `DataFrame` Based On A Condition

In [None]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date", "Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df.head(3)

## Filter with More than One Condition (AND)

In [None]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date", "Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df.head(3)

In [None]:
mask1 = df["Gender"] == "Male"
mask2 = df["Team"] == "Marketing"

df[mask1 & mask2]

## Filter with More than One Condition (OR)

In [None]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date", "Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df.head(3)

In [None]:
mask1 = df["Senior Management"]
mask2 = df["Start Date"] < "1990-01-01"

df[mask1 | mask2]

In [None]:
mask1 = df["First Name"] == "Robert"
mask2 = df["Team"] == "Client Services"
mask3 = df["Start Date"] > "2016-06-01"

df[(mask1 & mask2) | mask3]

## The `.isin()` Method

In [None]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date", "Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df.head(3)

In [None]:
mask1 = df["Team"] == "Legal"
mask2 = df["Team"] == "Sales"
mask3 = df["Team"] == "Product"

df[mask1 | mask2 | mask3]

## The `.isnull()` and `.notnull()` Methods

In [None]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date", "Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df.head(3)

In [None]:
mask = df["Team"].isnull()

df[mask]

In [None]:
condition = df["Gender"].notnull()

df[condition]

## The `.between()` Method

In [None]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date", "Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df.head(3)

In [None]:
df[df["Salary"].between(60000, 70000)]

In [None]:
df[df["Bonus %"].between(2.0, 5.0)]

In [None]:
df[df["Start Date"].between("1991-01-01", "1992-01-01")]

In [None]:
df[df["Last Login Time"].between("08:30AM", "12:00PM")]

## The `.duplicated()` Method

In [None]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date", "Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df.sort_values("First Name", inplace = True)
df.head(3)

In [None]:
mask = ~df["First Name"].duplicated(keep = False)
df[mask]

## The `.drop_duplicates()` Method

In [None]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date", "Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df.sort_values("First Name", inplace = True)
df.head(3)

In [None]:
len(df)

In [None]:
len(df.drop_duplicates())

In [None]:
df.drop_duplicates(subset = ["First Name"], keep = False)

In [None]:
df.drop_duplicates(subset = ["First Name", "Team"], inplace = True)

In [None]:
df.head(2)

In [None]:
len(df)

## The `.unique()` and `.nunique()` Methods

In [None]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date", "Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df.head(3)

In [None]:
df["Gender"].unique()

df["Team"].unique()

In [None]:
len(df["Team"].unique())

In [None]:
df["Team"].nunique(dropna = False)