# Explicit Indexes

Indexes are a fundamental concept used to uniquely identify rows in a data structure, such as a Series or DataFrame. They act as labels for the rows, enabling efficient data access, alignment, and merging. The pandas library is built on top of NumPy and provides powerful indexing capabilities, making it an essential tool for data analysis and manipulation.

In [2]:
import pandas as pd

In [4]:
path = r"C:\Users\Alysson\Documents\GitHub\Pandas-in-Python\database.xlsx"
data = pd.read_excel(path)

In [5]:
data.columns

Index(['name', 'breed', 'color', 'height_cm', 'weight_kg', 'age'], dtype='object')

In [7]:
data.index

RangeIndex(start=0, stop=5, step=1)

### Setting a column as the index

In [17]:
data_ind = data.set_index("name")
data_ind

Unnamed: 0_level_0,breed,color,height_cm,weight_kg,age
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Paçoca,Labrador,Brown,56,25,5
Ivo,Poodle,Black,43,22,4
Lola,Schnauzer,Gray,49,23,4
Maracatu,King Cavalier,Brown,43,21,3
Chantal,Labrador,Black,59,29,6


In [10]:
data_ind.reset_index(drop=True)

Unnamed: 0,breed,color,height_cm,weight_kg,age
0,Labrador,Brown,56,25,5
1,Poodle,Black,43,22,4
2,Schnauzer,Gray,49,23,4
3,King Cavalier,Brown,43,21,3
4,Labrador,Black,59,29,6


### Subsetting

In [11]:
data[data["name"].isin(["Ivo","Maracatu"])]

Unnamed: 0,name,breed,color,height_cm,weight_kg,age
1,Ivo,Poodle,Black,43,22,4
3,Maracatu,King Cavalier,Brown,43,21,3


In [13]:
## It is easier to do this using the "loc" method

data_ind.loc[["Ivo","Maracatu"]]

Unnamed: 0_level_0,breed,color,height_cm,weight_kg,age
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Ivo,Poodle,Black,43,22,4
Maracatu,King Cavalier,Brown,43,21,3


### Multi-level indexes

In [16]:
data_ind2=data.set_index(["breed","color"])
data_ind2

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,age
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Brown,Paçoca,56,25,5
Poodle,Black,Ivo,43,22,4
Schnauzer,Gray,Lola,49,23,4
King Cavalier,Brown,Maracatu,43,21,3
Labrador,Black,Chantal,59,29,6


In [18]:
# Subset the outer level with a list

data_ind2.loc[["Labrador","Poodle"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,age
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Brown,Paçoca,56,25,5
Labrador,Black,Chantal,59,29,6
Poodle,Black,Ivo,43,22,4


In [20]:
# Subset inner levels with a list of tuples

data_ind2.loc[[("Labrador","Brown"),("Poodle","Black")]]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,age
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Brown,Paçoca,56,25,5
Poodle,Black,Ivo,43,22,4


### Sorting by index values

In [21]:
data_ind2.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,age
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
King Cavalier,Brown,Maracatu,43,21,3
Labrador,Black,Chantal,59,29,6
Labrador,Brown,Paçoca,56,25,5
Poodle,Black,Ivo,43,22,4
Schnauzer,Gray,Lola,49,23,4


In [30]:
data_ind2.sort_index(level=["color","breed"],ascending=[True,False])

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,age
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Poodle,Black,Ivo,43,22,4
Labrador,Black,Chantal,59,29,6
Labrador,Brown,Paçoca,56,25,5
King Cavalier,Brown,Maracatu,43,21,3
Schnauzer,Gray,Lola,49,23,4


### Slicing and Subsetting with .loc and .iloc

In pandas, you can use .loc and .iloc to slice and subset data from a DataFrame or Series. Both methods allow you to select rows and columns based on labels (.loc) or integer positions (.iloc). 