In [1]:
people = {
    "first_name": ["John", "Jane", "James"],
    "last_name": ["Doe", "Doe Jr.", "Caldwin"],
    "emails": ["johndoe@gmail.com", "janedoe@gmail.com", "jamescaldwin@gmail.com"]
}


In [2]:
import pandas as pd

df = pd.DataFrame(people)
df

Unnamed: 0,first_name,last_name,emails
0,John,Doe,johndoe@gmail.com
1,Jane,Doe Jr.,janedoe@gmail.com
2,James,Caldwin,jamescaldwin@gmail.com


In [3]:
print(df["first_name"][1])


Jane


In [4]:
df.iloc[0]

first_name                 John
last_name                   Doe
emails        johndoe@gmail.com
Name: 0, dtype: object

In [5]:
df.iloc[[0, 1], 2]

0    johndoe@gmail.com
1    janedoe@gmail.com
Name: emails, dtype: object

In [6]:
df.loc[0]

first_name                 John
last_name                   Doe
emails        johndoe@gmail.com
Name: 0, dtype: object

In [7]:
df.loc[[0,1]]

Unnamed: 0,first_name,last_name,emails
0,John,Doe,johndoe@gmail.com
1,Jane,Doe Jr.,janedoe@gmail.com


In [8]:
# Setting a custom index to be the email. NOTE: Set the index to a unique identifier of the dataset for easier identification and saves time, remember to also set it at the beginning to avoid any conflicts. The inplace argument is used to tell Pandas that the set index should replace the default existing index
df.set_index("emails", inplace=True)
df

Unnamed: 0_level_0,first_name,last_name
emails,Unnamed: 1_level_1,Unnamed: 2_level_1
johndoe@gmail.com,John,Doe
janedoe@gmail.com,Jane,Doe Jr.
jamescaldwin@gmail.com,James,Caldwin


In [9]:
# Get info on user with a certain email using .loc[] NOTE: df.loc[0] won't work because the index has been replaced by "emails" column
df.loc["johndoe@gmail.com"]

first_name    John
last_name      Doe
Name: johndoe@gmail.com, dtype: object

In [10]:
# Reset the index back to default
df.reset_index(inplace=True) 

In [13]:
# we want to get the bool values of the last_name column for names ending with Doe.
df["last_name"] == "Doe"

0     True
1    False
2    False
Name: last_name, dtype: bool

In [14]:
# We would want to check the values of the booleans, so passing it into a variable "filt" would help in getting the actual values
filt = df["last_name"] == "Doe"

In [15]:
# Using the .loc() function to get the actual values of filt
df.loc[filt]

Unnamed: 0,emails,first_name,last_name
0,johndoe@gmail.com,John,Doe


In [16]:
# Getting the value of the "emails" column
df.loc[filt, "emails"]

0    johndoe@gmail.com
Name: emails, dtype: object

In [25]:
"""
AND, OR and NEGATE functions.
& - and
| - or
~ - negates the conditional

Let's say we want to get the rows with the last_name == Doe and first_name == James
"""
filt2 = (df["last_name"] == "Doe")  & (df["emails"] == "jamescaldwin@gmail.com")


In [26]:
# This wouldn't print anything because there is no row has the following characteristics
df.loc[filt2]

Unnamed: 0,emails,first_name,last_name


In [27]:
# Get the rows which either has the last_name == Doe or emails == jamescaldwin...
filt3 = (df["last_name"] == "Doe")  | (df["emails"] == "jamescaldwin@gmail.com")

In [28]:
df.loc[filt3]

Unnamed: 0,emails,first_name,last_name
0,johndoe@gmail.com,John,Doe
2,jamescaldwin@gmail.com,James,Caldwin


In [29]:
# Let's negate filt3 and get the opposite
df.loc[~filt3]

Unnamed: 0,emails,first_name,last_name
1,janedoe@gmail.com,Jane,Doe Jr.
