# PyTutorial 3.1 - Indexes

In this module, we will learn how to set, reset, and use indices in DataFrames.

In [69]:
import pandas as pd

# Let's create a DataFrame from a dictionary:
people = {
    "first": ["Han", "Luke", "Leia", "Ben", "Chewbacca", "R2-D2", "C-3PO"],
    "last": ["Solo", "Skywalker", "Organa", "Kenobi", None, None, None],
    "email": ["solo@sympatico.net", "luke.skywalker@rebel.org", "princess.leah@rogers.ca", "obiwan@gmail.com", None, "R2.D2@robo.net", "C.3PO@robo.net"],
	"age": [29, 19, 19, 57, 200, 32, 112],
	"occupation": ["Smuggler", "Moisture Farmer", "Princess of Alderaan", "Jedi Master", "Smuggler", "Mechanic", "Translator"],
	"race": ["Human", "Human", "Human", "Human", "Wookie", "Droid", "Droid"]
}

df1 = pd.DataFrame(people)
display(df1)

Unnamed: 0,first,last,email,age,occupation,race
0,Han,Solo,solo@sympatico.net,29,Smuggler,Human
1,Luke,Skywalker,luke.skywalker@rebel.org,19,Moisture Farmer,Human
2,Leia,Organa,princess.leah@rogers.ca,19,Princess of Alderaan,Human
3,Ben,Kenobi,obiwan@gmail.com,57,Jedi Master,Human
4,Chewbacca,,,200,Smuggler,Wookie
5,R2-D2,,R2.D2@robo.net,32,Mechanic,Droid
6,C-3PO,,C.3PO@robo.net,112,Translator,Droid


In [70]:
# The indices of a DataFrame are shown in the far left column. They are a series of labels that identify each row.
# By default they are set to whole number integers (0, 1, 2, ...).
# Indices can be any set of integers, strings, or other hashable type provided each index is unique. 
print(df1.index)

RangeIndex(start=0, stop=7, step=1)


In [71]:
# We can change the indices to something that better labels the items in the list using the "set_index()" method.
# For example, we can change the indices to the last names stored in the column "last":
df2 = df1.set_index("last")
display(df2)

Unnamed: 0_level_0,first,email,age,occupation,race
last,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Solo,Han,solo@sympatico.net,29,Smuggler,Human
Skywalker,Luke,luke.skywalker@rebel.org,19,Moisture Farmer,Human
Organa,Leia,princess.leah@rogers.ca,19,Princess of Alderaan,Human
Kenobi,Ben,obiwan@gmail.com,57,Jedi Master,Human
,Chewbacca,,200,Smuggler,Wookie
,R2-D2,R2.D2@robo.net,32,Mechanic,Droid
,C-3PO,C.3PO@robo.net,112,Translator,Droid


In [72]:
# To inspect of the new indices, we can use the "index" attribute again:
print(df2.index)

Index(['Solo', 'Skywalker', 'Organa', 'Kenobi', None, None, None], dtype='object', name='last')


In [73]:
# Note that the default functionality of "set_index()" creates a new DataFrame. It does not modify "df" in place as we can see by printing it again:
display(df1)

# To modify "df" in place, we need to specify the option "inplace = True":
df1.set_index("last", inplace=True)
display(df1)

# To reset the indices to their default values, use the "reset_index()" method:
df1.reset_index(inplace=True)
display(df1)

Unnamed: 0,first,last,email,age,occupation,race
0,Han,Solo,solo@sympatico.net,29,Smuggler,Human
1,Luke,Skywalker,luke.skywalker@rebel.org,19,Moisture Farmer,Human
2,Leia,Organa,princess.leah@rogers.ca,19,Princess of Alderaan,Human
3,Ben,Kenobi,obiwan@gmail.com,57,Jedi Master,Human
4,Chewbacca,,,200,Smuggler,Wookie
5,R2-D2,,R2.D2@robo.net,32,Mechanic,Droid
6,C-3PO,,C.3PO@robo.net,112,Translator,Droid


Unnamed: 0_level_0,first,email,age,occupation,race
last,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Solo,Han,solo@sympatico.net,29,Smuggler,Human
Skywalker,Luke,luke.skywalker@rebel.org,19,Moisture Farmer,Human
Organa,Leia,princess.leah@rogers.ca,19,Princess of Alderaan,Human
Kenobi,Ben,obiwan@gmail.com,57,Jedi Master,Human
,Chewbacca,,200,Smuggler,Wookie
,R2-D2,R2.D2@robo.net,32,Mechanic,Droid
,C-3PO,C.3PO@robo.net,112,Translator,Droid


Unnamed: 0,last,first,email,age,occupation,race
0,Solo,Han,solo@sympatico.net,29,Smuggler,Human
1,Skywalker,Luke,luke.skywalker@rebel.org,19,Moisture Farmer,Human
2,Organa,Leia,princess.leah@rogers.ca,19,Princess of Alderaan,Human
3,Kenobi,Ben,obiwan@gmail.com,57,Jedi Master,Human
4,,Chewbacca,,200,Smuggler,Wookie
5,,R2-D2,R2.D2@robo.net,32,Mechanic,Droid
6,,C-3PO,C.3PO@robo.net,112,Translator,Droid


In [74]:
# Now let's see how these two indexed DataFrames (df1: default indexed and df2: last name indexed) behave when we try to access elements from them.
# To get the nth row of data in each DataFrame, we can use "iloc[integer position]":
n = 0
display(df1.iloc[n])
display(df2.iloc[n])

# Notice that "last" is missing in "df2" because this column has been reassigned to the index (or "Name") of the row.

last                        Solo
first                        Han
email         solo@sympatico.net
age                           29
occupation              Smuggler
race                       Human
Name: 0, dtype: object

first                        Han
email         solo@sympatico.net
age                           29
occupation              Smuggler
race                       Human
Name: Solo, dtype: object

In [76]:
# To get data for a specific label we can use "loc[label name]":
display(df1.loc[0])
display(df2.loc["Solo"])

# Because the indices have been reassigned in df2, now we must use the last name as the label.
# The following will result in a KeyError:
# display(df2.loc[0])

last                        Solo
first                        Han
email         solo@sympatico.net
age                           29
occupation              Smuggler
race                       Human
Name: 0, dtype: object

first                        Han
email         solo@sympatico.net
age                           29
occupation              Smuggler
race                       Human
Name: Solo, dtype: object

In [77]:
# We can also use "loc" to get specific elements of a DataFrame by specifying both the row and column labels:
print(df1.loc[0,"email"])
print(df2.loc["Solo","email"])

solo@sympatico.net
solo@sympatico.net


In [81]:
# Now lets see how we can sort the data in each DataFrame.
# The "sort_index()" method allows us to sort a DataFrame by the indices along a particular axis (row or column).

# By default, the method sorts the DataFrame by index ("axis = 0") in ascending order:
display(df1.sort_index())

# Setting "axis = 1" will sort by column in ascending order:
display(df1.sort_index(axis=1))

# Note that the option "inplace = True" must be set to modify the DataFrames in place.

Unnamed: 0,last,first,email,age,occupation,race
0,Solo,Han,solo@sympatico.net,29,Smuggler,Human
1,Skywalker,Luke,luke.skywalker@rebel.org,19,Moisture Farmer,Human
2,Organa,Leia,princess.leah@rogers.ca,19,Princess of Alderaan,Human
3,Kenobi,Ben,obiwan@gmail.com,57,Jedi Master,Human
4,,Chewbacca,,200,Smuggler,Wookie
5,,R2-D2,R2.D2@robo.net,32,Mechanic,Droid
6,,C-3PO,C.3PO@robo.net,112,Translator,Droid


Unnamed: 0,age,email,first,last,occupation,race
0,29,solo@sympatico.net,Han,Solo,Smuggler,Human
1,19,luke.skywalker@rebel.org,Luke,Skywalker,Moisture Farmer,Human
2,19,princess.leah@rogers.ca,Leia,Organa,Princess of Alderaan,Human
3,57,obiwan@gmail.com,Ben,Kenobi,Jedi Master,Human
4,200,,Chewbacca,,Smuggler,Wookie
5,32,R2.D2@robo.net,R2-D2,,Mechanic,Droid
6,112,C.3PO@robo.net,C-3PO,,Translator,Droid


In [82]:
# For df1, setting the option "ascending = False" will sort the integer labels in descending order:
display(df1.sort_index(ascending=False))

# For df2, setting the option "ascending = True" will sort the string labels in alphabetical order:
display(df2.sort_index(ascending=True))

Unnamed: 0,last,first,email,age,occupation,race
6,,C-3PO,C.3PO@robo.net,112,Translator,Droid
5,,R2-D2,R2.D2@robo.net,32,Mechanic,Droid
4,,Chewbacca,,200,Smuggler,Wookie
3,Kenobi,Ben,obiwan@gmail.com,57,Jedi Master,Human
2,Organa,Leia,princess.leah@rogers.ca,19,Princess of Alderaan,Human
1,Skywalker,Luke,luke.skywalker@rebel.org,19,Moisture Farmer,Human
0,Solo,Han,solo@sympatico.net,29,Smuggler,Human


Unnamed: 0_level_0,first,email,age,occupation,race
last,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Kenobi,Ben,obiwan@gmail.com,57,Jedi Master,Human
Organa,Leia,princess.leah@rogers.ca,19,Princess of Alderaan,Human
Skywalker,Luke,luke.skywalker@rebel.org,19,Moisture Farmer,Human
Solo,Han,solo@sympatico.net,29,Smuggler,Human
,Chewbacca,,200,Smuggler,Wookie
,R2-D2,R2.D2@robo.net,32,Mechanic,Droid
,C-3PO,C.3PO@robo.net,112,Translator,Droid


In [84]:
# The "sort_values()" method allows us to sort a DataFrame by the values along a particular axis (row or column).

# To sort the data by the values in column "age" in ascending order:
display(df1.sort_values("age"))

# To sort the data by the values in column "first" in alphabetical order:
display(df1.sort_values("first"))

Unnamed: 0,last,first,email,age,occupation,race
1,Skywalker,Luke,luke.skywalker@rebel.org,19,Moisture Farmer,Human
2,Organa,Leia,princess.leah@rogers.ca,19,Princess of Alderaan,Human
0,Solo,Han,solo@sympatico.net,29,Smuggler,Human
5,,R2-D2,R2.D2@robo.net,32,Mechanic,Droid
3,Kenobi,Ben,obiwan@gmail.com,57,Jedi Master,Human
6,,C-3PO,C.3PO@robo.net,112,Translator,Droid
4,,Chewbacca,,200,Smuggler,Wookie


Unnamed: 0,last,first,email,age,occupation,race
3,Kenobi,Ben,obiwan@gmail.com,57,Jedi Master,Human
6,,C-3PO,C.3PO@robo.net,112,Translator,Droid
4,,Chewbacca,,200,Smuggler,Wookie
0,Solo,Han,solo@sympatico.net,29,Smuggler,Human
2,Organa,Leia,princess.leah@rogers.ca,19,Princess of Alderaan,Human
1,Skywalker,Luke,luke.skywalker@rebel.org,19,Moisture Farmer,Human
5,,R2-D2,R2.D2@robo.net,32,Mechanic,Droid
